From f70b6271ab13eb4afc2bb46e1529a225e1948d30 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 5 Nov 2024 14:31:26 +0000 Subject: [PATCH 01/12] tls: enable support for image encryption This patch extends CRIU dump with support for encryption of images using ChaCha20-Poly1305 authenticated-encryption in combination with X.509 certificates. The '--encrypt' option can be used with the dump/pre-dump commands to enable this functionality. When this option has been specified during dump, the GnuTLS library will be used to load a public key from X.509 certificate, and to generate a 256-bit random `token`. The token's value is then encrypted with the public key and the corresponding ciphertext is saved in `cipher.img`. During restore, if cipher.img exists in the images directory, the GnuTLS library will be used to load a private key from a corresponding PEM file to decrypt the token value. The token value is used with ChaCha20-Poly1305 to encrypt/decrypt all other CRIU images. The 256-bit token is used in combination with 96-bits `nonce` and 128-bits `tag` to protect data confidentiality and provide message authentication for each data entry. Example: criu dump --encrypt ... criu restore ... Signed-off-by: Radostin Stoyanov --- Documentation/criu.txt | 8 + criu/config.c | 8 +- criu/cr-dump.c | 14 ++ criu/cr-restore.c | 4 + criu/crtools.c | 3 + criu/image-desc.c | 1 + criu/image.c | 117 +++++++-- criu/include/cr_options.h | 3 + criu/include/image-desc.h | 1 + criu/include/image.h | 19 +- criu/include/magic.h | 1 + criu/include/protobuf-desc.h | 1 + criu/include/tls.h | 19 ++ criu/ipc_ns.c | 4 +- criu/net.c | 10 +- criu/protobuf-desc.c | 1 + criu/protobuf.c | 67 +++++- criu/sk-queue.c | 2 +- criu/sk-tcp.c | 4 +- criu/tls.c | 443 ++++++++++++++++++++++++++++++++++- images/Makefile | 1 + images/cipher.proto | 7 + lib/pycriu/images/images.py | 1 + 23 files changed, 699 insertions(+), 40 deletions(-) create mode 100644 images/cipher.proto diff --git a/Documentation/criu.txt b/Documentation/criu.txt index 606935790b..79a1a35a79 100644 --- a/Documentation/criu.txt +++ b/Documentation/criu.txt @@ -433,6 +433,14 @@ By default the option is set to *fpu* and *ins*. option is intended for post-copy (lazy) migration and should be used in conjunction with *restore* with appropriate options. +*-e*, *--encrypt*:: + Encrypt the contents of the image files. The encryption key is + loaded from an X.509 certificate, which can be specified using + the *--tls-cert* option. The *restore* operation automatically + detects if the image files are encrypted and loads a private key + from a corresponding PEM file, which can be specified using the + *--tls-key* option. + *--file-validation* ['mode']:: Set the method to be used to validate open files. Validation is done to ensure that the version of the file being restored is the same diff --git a/criu/config.c b/criu/config.c index 1322a490ab..7e46e57941 100644 --- a/criu/config.c +++ b/criu/config.c @@ -617,7 +617,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, "no-" OPT_NAME, no_argument, SAVE_TO, false \ } - static const char short_opts[] = "dSsRt:hD:o:v::x::Vr:jJ:lW:L:M:"; + static const char short_opts[] = "dSseRt:hD:o:v::x::Vr:jJ:lW:L:M:"; static struct option long_opts[] = { { "tree", required_argument, 0, 't' }, { "leave-stopped", no_argument, 0, 's' }, @@ -703,6 +703,7 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, BOOL_OPT("mntns-compat-mode", &opts.mntns_compat_mode), BOOL_OPT("unprivileged", &opts.unprivileged), BOOL_OPT("ghost-fiemap", &opts.ghost_fiemap), + { "encrypt", no_argument, 0, 'e' }, {}, }; @@ -812,6 +813,9 @@ int parse_options(int argc, char **argv, bool *usage_error, bool *has_exec_cmd, } else opts.log_level++; break; + case 'e': + opts.encrypt = true; + break; case 1043: { int fd; @@ -1108,7 +1112,7 @@ int check_options(void) } #ifndef CONFIG_GNUTLS - if (opts.tls) { + if (opts.tls || opts.encrypt) { pr_err("CRIU was built without TLS support\n"); return 1; } diff --git a/criu/cr-dump.c b/criu/cr-dump.c index 1bc5d934f5..d9cc330a5d 100644 --- a/criu/cr-dump.c +++ b/criu/cr-dump.c @@ -88,6 +88,7 @@ #include "asm/dump.h" #include "timer.h" #include "sigact.h" +#include "tls.h" /* * Architectures can overwrite this function to restore register sets that @@ -1936,6 +1937,9 @@ int cr_pre_dump_tasks(pid_t pid) opts.final_state = TASK_ALIVE; } + if (tls_initialize_cipher()) + goto err; + if (init_stats(DUMP_STATS)) goto err; @@ -1991,6 +1995,9 @@ int cr_pre_dump_tasks(pid_t pid) if (irmap_predump_prep()) goto err; + if (write_img_cipher()) + goto err; + ret = 0; err: if (parent_ie) @@ -2145,6 +2152,10 @@ int cr_dump_tasks(pid_t pid) pr_err("Pre dump script failed with %d!\n", pre_dump_ret); goto err; } + + if (tls_initialize_cipher()) + goto err; + if (init_stats(DUMP_STATS)) goto err; @@ -2296,6 +2307,9 @@ int cr_dump_tasks(pid_t pid) if (ret) goto err; + if (write_img_cipher()) + goto err; + he.has_pre_dump_mode = false; ret = write_img_inventory(&he); diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 646300bdb8..beb63a2b24 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -80,6 +80,7 @@ #include "bpfmap.h" #include "apparmor.h" #include "pidfd.h" +#include "tls.h" #include "parasite-syscall.h" #include "files-reg.h" @@ -2360,6 +2361,9 @@ int cr_restore_tasks(void) if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE)) return -1; + if (tls_initialize_cipher_from_image()) + goto err; + if (init_stats(RESTORE_STATS)) goto err; diff --git a/criu/crtools.c b/criu/crtools.c index 6f493850b9..2d6f1653b7 100644 --- a/criu/crtools.c +++ b/criu/crtools.c @@ -547,6 +547,9 @@ int main(int argc, char *argv[], char *envp[]) " will be punched from the image\n" " --pre-dump-mode splice - parasite based pre-dumping (default)\n" " read - process_vm_readv syscall based pre-dumping\n" +#ifdef CONFIG_GNUTLS + " -e|--encrypt encrypt the contents of images\n" +#endif "\n" "Page/Service server options:\n" " --address ADDR address of server or service\n" diff --git a/criu/image-desc.c b/criu/image-desc.c index 2d87c73815..c9c278748d 100644 --- a/criu/image-desc.c +++ b/criu/image-desc.c @@ -108,6 +108,7 @@ struct cr_fd_desc_tmpl imgset_template[CR_FD_MAX] = { FD_ENTRY_F(BPFMAP_DATA, "bpfmap-data", O_NOBUF), FD_ENTRY(APPARMOR, "apparmor"), FD_ENTRY(PIDFD, "pidfd"), + FD_ENTRY(CIPHER, "cipher"), [CR_FD_STATS] = { .fmt = "stats-%s", diff --git a/criu/image.c b/criu/image.c index 9589167fb1..ee32fe1723 100644 --- a/criu/image.c +++ b/criu/image.c @@ -4,6 +4,7 @@ #include #include #include + #include "crtools.h" #include "cr_options.h" #include "imgset.h" @@ -19,6 +20,7 @@ #include "proc_parse.h" #include "img-streamer.h" #include "namespaces.h" +#include "tls.h" bool ns_per_id = false; bool img_common_magic = true; @@ -793,61 +795,132 @@ struct cr_img *open_pages_image(unsigned long flags, struct cr_img *pmi, u32 *id } /* - * Write buffer @ptr of @size bytes into @fd file + * Write buffer @ptr of @size bytes into @fd file. + * The @encrypt_data boolean flag indicates whether + * the data should be encrypted using ChaCha20-Poly1305. + * * Returns * 0 on success * -1 on error (error message is printed) */ -int write_img_buf(struct cr_img *img, const void *ptr, int size) +int write_img_buf(struct cr_img *img, const void *ptr, int size, bool encrypt_data) { - int ret; + int ret, exit_code = -1; + char *buf = NULL; + chacha20_poly1305_t cipher_data; - ret = bwrite(&img->_x, ptr, size); - if (ret == size) - return 0; + if (!encrypt_data || !opts.encrypt || size <= 0) { + buf = (void *)ptr; + } else { + buf = xmalloc(size); + if (!buf) + return -1; - if (ret < 0) - pr_perror("Can't write img file"); - else - pr_err("Img trimmed %d/%d\n", ret, size); - return -1; + if (memcpy(buf, ptr, size) == NULL) { + pr_perror("Failed to copy buffer data"); + goto err; + } + + /* Encrypt buffer data using ChaCha20-Poly1305 */ + ret = tls_encrypt_data(buf, size, cipher_data.tag, cipher_data.nonce); + if (ret < 0) { + pr_err("Failed to encrypt buffer data\n"); + goto err; + } + } + + ret = bwrite(&img->_x, buf, size); + if (ret != size) { + if (ret < 0) + pr_perror("Can't write img file"); + else + pr_err("Img trimmed %d/%d\n", ret, size); + + goto err; + } + + if (encrypt_data && opts.encrypt && size > 0) { + ret = bwrite(&img->_x, cipher_data.tag, sizeof(cipher_data.tag)); + if (ret != sizeof(cipher_data.tag)) { + pr_err("Failed to write tag data to image file\n"); + goto err; + } + + ret = bwrite(&img->_x, cipher_data.nonce, sizeof(cipher_data.nonce)); + if (ret != sizeof(cipher_data.nonce)) { + pr_err("Failed to write nonce data to image file\n"); + goto err; + } + } + + exit_code = 0; +err: + if (buf != (void *)ptr) + xfree(buf); + return exit_code; } /* * Read buffer @ptr of @size bytes from @fd file + * The @decrypt_data boolean flag indicates whether + * the data should be decrypted using ChaCha20-Poly1305. + * * Returns * 1 on success * 0 on EOF (silently) * -1 on error (error message is printed) */ -int read_img_buf_eof(struct cr_img *img, void *ptr, int size) +int do_read_img_buf_eof(struct cr_img *img, void *ptr, int size, bool decrypt_data) { + chacha20_poly1305_t cipher_data; int ret; ret = bread(&img->_x, ptr, size); + if (ret < 0) { + pr_perror("Can't read img file"); + return -1; + } + if (ret == 0) + return (size == 0); + + if (opts.encrypt && decrypt_data && size > 0) { + if (bread(&img->_x, cipher_data.tag, sizeof(cipher_data.tag)) <= 0) { + pr_err("Can't read tag data\n"); + return -1; + } + + if (bread(&img->_x, cipher_data.nonce, sizeof(cipher_data.nonce)) <= 0) { + pr_err("Can't read nonce data\n"); + return -1; + } + + if (tls_decrypt_data(ptr, size, cipher_data.tag, cipher_data.nonce) < 0) { + pr_err("Can't decrypt data\n"); + return -1; + } + } + if (ret == size) return 1; - if (ret == 0) - return 0; - if (ret < 0) - pr_perror("Can't read img file"); - else - pr_err("Img trimmed %d/%d\n", ret, size); + pr_err("Img trimmed %d/%d\n", ret, size); return -1; } /* * Read buffer @ptr of @size bytes from @fd file + * The @decrypt_data boolean flag indicates whether + * the data should be decrypted using ChaCha20-Poly1305. + * * Returns * 1 on success * -1 on error or EOF (error message is printed) */ -int read_img_buf(struct cr_img *img, void *ptr, int size) +int do_read_img_buf(struct cr_img *img, void *ptr, int size, bool decrypt_data) { int ret; - ret = read_img_buf_eof(img, ptr, size); + ret = do_read_img_buf_eof(img, ptr, size, decrypt_data); if (ret == 0) { pr_err("Unexpected EOF\n"); ret = -1; @@ -870,7 +943,9 @@ int read_img_str(struct cr_img *img, char **pstr, int size) if (!str) return -1; - ret = read_img_buf(img, str, size); + /* TODO: read_img_str() is used only in do_restore_nftables() + * which does not currently use encryption. */ + ret = do_read_img_buf(img, str, size, false); if (ret < 0) { xfree(str); return -1; diff --git a/criu/include/cr_options.h b/criu/include/cr_options.h index 60cf9437e6..003230ba10 100644 --- a/criu/include/cr_options.h +++ b/criu/include/cr_options.h @@ -210,6 +210,9 @@ struct cr_options { int tls; int tls_no_cn_verify; + /* This enables encryption for CRIU images. */ + int encrypt; + /* This stores which method to use for file validation. */ int file_validation_method; diff --git a/criu/include/image-desc.h b/criu/include/image-desc.h index 79e1ac1113..6d5374a841 100644 --- a/criu/include/image-desc.h +++ b/criu/include/image-desc.h @@ -6,6 +6,7 @@ enum { CR_FD_INVENTORY, CR_FD_STATS, + CR_FD_CIPHER, /* * Task entries */ diff --git a/criu/include/image.h b/criu/include/image.h index afa7d5e12f..b4ff0bfad8 100644 --- a/criu/include/image.h +++ b/criu/include/image.h @@ -167,12 +167,19 @@ extern void up_page_ids_base(void); extern struct cr_img *img_from_fd(int fd); /* for cr-show mostly */ -extern int write_img_buf(struct cr_img *, const void *ptr, int size); -#define write_img(img, ptr) write_img_buf((img), (ptr), sizeof(*(ptr))) -extern int read_img_buf_eof(struct cr_img *, void *ptr, int size); -#define read_img_eof(img, ptr) read_img_buf_eof((img), (ptr), sizeof(*(ptr))) -extern int read_img_buf(struct cr_img *, void *ptr, int size); -#define read_img(img, ptr) read_img_buf((img), (ptr), sizeof(*(ptr))) +extern int write_img_buf(struct cr_img *, const void *ptr, int size, bool encrypt_data); +/* write_img() is used only for writing image magic values */ +#define write_img(img, ptr) write_img_buf((img), (ptr), sizeof(*(ptr)), false) + +extern int do_read_img_buf_eof(struct cr_img *, void *ptr, int size, bool decrypt_data); +#define read_img_buf_eof(img, ptr, size) do_read_img_buf_eof((img), (ptr), (size), true) +#define read_img_eof(img, ptr) do_read_img_buf_eof((img), (ptr), sizeof(*(ptr)), true) + +extern int do_read_img_buf(struct cr_img *, void *ptr, int size, bool decrypt_data); +#define read_img_buf(img, ptr, size) do_read_img_buf((img), (ptr), (size), true) +/* read_img() is used only for reading image magic values */ +#define read_img(img, ptr) do_read_img_buf((img), (ptr), sizeof(*(ptr)), false) + extern int read_img_str(struct cr_img *, char **pstr, int size); extern void close_image(struct cr_img *); diff --git a/criu/include/magic.h b/criu/include/magic.h index 6f0aff26d8..105e9e1c98 100644 --- a/criu/include/magic.h +++ b/criu/include/magic.h @@ -101,6 +101,7 @@ #define BPFMAP_DATA_MAGIC 0x64324033 /* Arkhangelsk */ #define APPARMOR_MAGIC 0x59423047 /* Nikolskoye */ #define PIDFD_MAGIC 0x54435556 /* Ufa */ +#define CIPHER_MAGIC 0x50034653 /* Pallasovka */ #define IFADDR_MAGIC RAW_IMAGE_MAGIC #define ROUTE_MAGIC RAW_IMAGE_MAGIC diff --git a/criu/include/protobuf-desc.h b/criu/include/protobuf-desc.h index c4241be557..8e95385fd6 100644 --- a/criu/include/protobuf-desc.h +++ b/criu/include/protobuf-desc.h @@ -71,6 +71,7 @@ enum { PB_BPFMAP_DATA, PB_APPARMOR, PB_PIDFD, + PB_CIPHER, /* PB_AUTOGEN_STOP */ diff --git a/criu/include/tls.h b/criu/include/tls.h index f563c092c6..8afac963af 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -1,8 +1,16 @@ #ifndef __CR_TLS_H__ #define __CR_TLS_H__ +/* 96-bits nonce and 128-bits tag for ChaCha20-Poly1305 */ +typedef struct { + uint8_t tag[16]; + uint8_t nonce[12]; +} chacha20_poly1305_t; + #ifdef CONFIG_GNUTLS +#include + int tls_x509_init(int sockfd, bool is_server); void tls_terminate_session(bool async); @@ -12,6 +20,12 @@ ssize_t tls_recv(void *buf, size_t len, int flags); int tls_send_data_from_fd(int fd, unsigned long len); int tls_recv_data_to_fd(int fd, unsigned long len); +int write_img_cipher(void); +int tls_initialize_cipher(void); +int tls_initialize_cipher_from_image(void); +int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *nonce_data); +int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *nonce_data); + #else /* CONFIG_GNUTLS */ #define tls_x509_init(sockfd, is_server) (0) @@ -20,6 +34,11 @@ int tls_recv_data_to_fd(int fd, unsigned long len); #define tls_send_data_from_fd(fd, len) (-1) #define tls_recv_data_to_fd(fd, len) (-1) #define tls_terminate_session(async) +#define tls_initialize_cipher() (0) +#define tls_initialize_cipher_from_image() (0) +#define tls_encrypt_data(data, data_size, tag_data, nonce_data) (-1) +#define tls_decrypt_data(data, data_size, tag_data, nonce_data) (-1) +#define write_img_cipher() (0) #endif /* CONFIG_HAS_GNUTLS */ diff --git a/criu/ipc_ns.c b/criu/ipc_ns.c index 7e95be8c52..ce6ac2d5ff 100644 --- a/criu/ipc_ns.c +++ b/criu/ipc_ns.c @@ -93,7 +93,7 @@ static int dump_ipc_sem_set(struct cr_img *img, const IpcSemEntry *sem) pr_info_ipc_sem_array(sem->nsems, values); memzero((void *)values + size, rounded - size); - ret = write_img_buf(img, values, rounded); + ret = write_img_buf(img, values, rounded, true); if (ret < 0) { pr_err("Failed to write IPC message data\n"); goto out; @@ -214,7 +214,7 @@ static int dump_ipc_msg_queue_messages(struct cr_img *img, const IpcMsgEntry *ms rounded = round_up(msg.msize, sizeof(u64)); memzero(((void *)message->mtext + msg.msize), rounded - msg.msize); - ret = write_img_buf(img, message->mtext, rounded); + ret = write_img_buf(img, message->mtext, rounded, true); if (ret < 0) { pr_err("Failed to write IPC message data\n"); break; diff --git a/criu/net.c b/criu/net.c index eee3311087..4d8734fdfe 100644 --- a/criu/net.c +++ b/criu/net.c @@ -948,7 +948,15 @@ static int dump_one_nf(struct nlmsghdr *hdr, struct ns_id *ns, void *arg) if (lazy_image(img) && open_image_lazy(img)) return -1; - if (write_img_buf(img, hdr, hdr->nlmsg_len)) + /* During restore we first read the value of nlmsg_len to determine + * the message payload length. Then we read the payload itself. + * To support encryption we need to encrypt (write) the length of + * the payload first, and then the payload itself. + */ + if (write_img_buf(img, hdr, sizeof(struct nlmsghdr), true)) + return -1; + + if (write_img_buf(img, hdr + 1, hdr->nlmsg_len - sizeof(struct nlmsghdr), true)) return -1; return 0; diff --git a/criu/protobuf-desc.c b/criu/protobuf-desc.c index e0dbfccc21..7cde74741f 100644 --- a/criu/protobuf-desc.c +++ b/criu/protobuf-desc.c @@ -69,6 +69,7 @@ #include "images/bpfmap-data.pb-c.h" #include "images/apparmor.pb-c.h" #include "images/pidfd.pb-c.h" +#include "images/cipher.pb-c.h" struct cr_pb_message_desc cr_pb_descs[PB_MAX]; diff --git a/criu/protobuf.c b/criu/protobuf.c index ef78bcec73..76009ccc33 100644 --- a/criu/protobuf.c +++ b/criu/protobuf.c @@ -19,6 +19,7 @@ #include "bfd.h" #include "protobuf.h" #include "util.h" +#include "tls.h" #define image_name(img, buf) __image_name(img, buf, sizeof(buf)) static char *__image_name(struct cr_img *img, char *image_path, size_t image_path_size) @@ -53,6 +54,7 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) void *buf = (void *)&local; u32 size; int ret; + chacha20_poly1305_t cipher_data; if (!cr_pb_descs[type].pb_desc) { pr_err("Wrong object requested %d on %s\n", type, image_name(img, img_name_buf)); @@ -95,6 +97,41 @@ int do_pb_read_one(struct cr_img *img, void **pobj, int type, bool eof) goto err; } + if (opts.encrypt && type != PB_CIPHER && size > 0) { + /* Read tag data */ + ret = bread(&img->_x, cipher_data.tag, sizeof(cipher_data.tag)); + if (ret < 0) { + pr_perror("Can't read %d bytes of tag data from file %s", (int)sizeof(cipher_data.tag), + image_name(img, img_name_buf)); + goto err; + } else if (ret != sizeof(cipher_data.tag)) { + pr_perror("Read %d bytes of tag data while %d expected from %s", ret, (int)sizeof(cipher_data.tag), + image_name(img, img_name_buf)); + ret = -1; + goto err; + } + + /* Read nonce data */ + ret = bread(&img->_x, cipher_data.nonce, sizeof(cipher_data.nonce)); + if (ret < 0) { + pr_perror("Can't read %d bytes of nonce data from file %s", (int)sizeof(cipher_data.nonce), + image_name(img, img_name_buf)); + goto err; + } else if (ret != sizeof(cipher_data.nonce)) { + pr_perror("Read %d bytes of nonce data while %d expected from %s", ret, (int)sizeof(cipher_data.nonce), + image_name(img, img_name_buf)); + ret = -1; + goto err; + } + + /* Decrypt the content of buf */ + ret = tls_decrypt_data(buf, size, cipher_data.tag, cipher_data.nonce); + if (ret < 0) { + pr_err("Failed to decrypt object\n"); + goto err; + } + } + *pobj = cr_pb_descs[type].unpack(NULL, size, buf); if (!*pobj) { ret = -1; @@ -124,7 +161,11 @@ int pb_write_one(struct cr_img *img, void *obj, int type) void *buf = (void *)&local; u32 size, packed; int ret = -1; - struct iovec iov[2]; + int total_size = 0; + struct iovec iov[4]; + /* size + packed object */ + int iov_cnt = 2; + chacha20_poly1305_t cipher_data; if (!cr_pb_descs[type].pb_desc) { pr_err("Wrong object requested %d\n", type); @@ -147,13 +188,33 @@ int pb_write_one(struct cr_img *img, void *obj, int type) goto err; } + /* Encrypt packed object using ChaCha20-Poly1305 */ + if (opts.encrypt && size > 0 && type != PB_CIPHER && type != PB_STATS) { + ret = tls_encrypt_data(buf, size, cipher_data.tag, cipher_data.nonce); + if (ret < 0) { + pr_err("Failed to encrypt object\n"); + goto err; + } + + iov[2].iov_base = cipher_data.tag; + iov[2].iov_len = sizeof(cipher_data.tag); + iov[3].iov_base = cipher_data.nonce; + iov[3].iov_len = sizeof(cipher_data.nonce); + + /* size + packed object + tag + nonce */ + iov_cnt = 4; + total_size = iov[2].iov_len + iov[3].iov_len; + } + iov[0].iov_base = &size; iov[0].iov_len = sizeof(size); iov[1].iov_base = buf; iov[1].iov_len = size; - ret = bwritev(&img->_x, iov, 2); - if (ret != size + sizeof(size)) { + total_size += iov[0].iov_len + iov[1].iov_len; + + ret = bwritev(&img->_x, iov, iov_cnt); + if (ret != total_size) { pr_perror("Can't write %d bytes", (int)(size + sizeof(size))); goto err; } diff --git a/criu/sk-queue.c b/criu/sk-queue.c index dbd9d1d8bb..088238f46e 100644 --- a/criu/sk-queue.c +++ b/criu/sk-queue.c @@ -254,7 +254,7 @@ int dump_sk_queue(int sock_fd, int sock_id) goto err_set_sock; } - ret = write_img_buf(img_from_set(glob_imgset, CR_FD_SK_QUEUES), data, pe.length); + ret = write_img_buf(img_from_set(glob_imgset, CR_FD_SK_QUEUES), data, pe.length, true); if (ret < 0) { ret = -EIO; goto err_set_sock; diff --git a/criu/sk-tcp.c b/criu/sk-tcp.c index 9c8bad1c3f..4380c02e3c 100644 --- a/criu/sk-tcp.c +++ b/criu/sk-tcp.c @@ -199,7 +199,7 @@ static int dump_tcp_conn_state(struct inet_sk_desc *sk) buf = libsoccr_get_queue_bytes(socr, TCP_RECV_QUEUE, SOCCR_MEM_EXCL); if (buf) { - ret = write_img_buf(img, buf, tse.inq_len); + ret = write_img_buf(img, buf, tse.inq_len, true); if (ret < 0) goto err_close; @@ -208,7 +208,7 @@ static int dump_tcp_conn_state(struct inet_sk_desc *sk) buf = libsoccr_get_queue_bytes(socr, TCP_SEND_QUEUE, SOCCR_MEM_EXCL); if (buf) { - ret = write_img_buf(img, buf, tse.outq_len); + ret = write_img_buf(img, buf, tse.outq_len, true); if (ret < 0) goto err_close; diff --git a/criu/tls.c b/criu/tls.c index 3d365e21d1..1766ca5358 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -1,10 +1,18 @@ #include #include #include +#include +#include #include #include +#include +#include +#include +#include "imgset.h" +#include "images/cipher.pb-c.h" +#include "protobuf.h" #include "cr_options.h" #include "xmalloc.h" @@ -26,11 +34,24 @@ #define tls_perror(msg, ret) pr_err("%s: %s\n", msg, gnutls_strerror(ret)) +#define cleanup_gnutls_datum __attribute__((cleanup(_cleanup_gnutls_datum))) +static inline void _cleanup_gnutls_datum(gnutls_datum_t *p) +{ + if (p->data != NULL) { + gnutls_free(p->data); + } +} + static gnutls_session_t session; static gnutls_certificate_credentials_t x509_cred; +static gnutls_pubkey_t pubkey; static int tls_sk = -1; static int tls_sk_flags = 0; +/* 256-bits key for ChaCha20-Poly1305 */ +static uint8_t token[32]; +static const gnutls_cipher_algorithm_t stream_cipher_algorithm = GNUTLS_CIPHER_CHACHA20_POLY1305; + void tls_terminate_session(bool async) { int ret; @@ -95,8 +116,7 @@ int tls_send_data_from_fd(int fd, unsigned long len) { ssize_t copied; unsigned long buf_size = min(len, (unsigned long)SPLICE_BUF_SZ_MAX); - void *buf = xmalloc(buf_size); - + char *buf = xmalloc(buf_size); if (!buf) return -1; @@ -402,3 +422,422 @@ int tls_x509_init(int sockfd, bool is_server) tls_terminate_session(true); return -1; } + +static inline int _tls_generate_token(void) +{ + return gnutls_rnd(GNUTLS_RND_KEY, &token, sizeof(token)); +} + +/** + * tls_initialize_cipher initializes GnuTLS, loads a public key, + * and initializes a cipher context that is used to encrypt the + * content of images during dump and pre-dump. + */ +int tls_initialize_cipher(void) +{ + int ret; + char *cert_file_path = CRIU_CERT; + gnutls_x509_crt_t crt; + cleanup_gnutls_datum gnutls_datum_t cert_data = { NULL, 0 }; + + if (!opts.encrypt) + return 0; + + if (opts.tls_cert) + cert_file_path = opts.tls_cert; + + pr_debug("Loading public key from %s\n", cert_file_path); + ret = gnutls_load_file(cert_file_path, &cert_data); + if (ret < 0) { + tls_perror("Failed to load certificate file", ret); + return -1; + } + + ret = gnutls_pubkey_init(&pubkey); + if (ret < 0) { + tls_perror("Failed to initialize public key", ret); + return -1; + } + + ret = gnutls_x509_crt_init(&crt); + if (ret < 0) { + tls_perror("Failed to initialize X.509 certificate structure", ret); + return -1; + } + + ret = gnutls_x509_crt_import(crt, &cert_data, GNUTLS_X509_FMT_PEM); + if (ret < 0) { + tls_perror("Failed to import certificate", ret); + return -1; + } + + ret = gnutls_pubkey_import_x509(pubkey, crt, 0); + if (ret < 0) { + tls_perror("Failed to load public key", ret); + return -1; + } + + ret = _tls_generate_token(); + if (ret < 0) { + tls_perror("Failed to generate token", ret); + return -1; + } + + gnutls_x509_crt_deinit(crt); + + return 0; +} + +static int read_fp(FILE *fp, void *buf, const size_t buf_len) +{ + size_t len_read; + + len_read = fread(buf, 1, buf_len, fp); + if (len_read != buf_len) { + pr_perror("Unable to read file (read:%ld buf_len:%ld)", len_read, buf_len); + return -EIO; + } + return 0; +} + +static int read_file(const char *file_path, void *buf, const size_t buf_len) +{ + int ret; + FILE *fp; + + fp = fopen(file_path, "r"); + if (!fp) { + pr_perror("Cannot fopen %s", file_path); + return -errno; + } + + ret = read_fp(fp, buf, buf_len); + fclose(fp); /* this will also close fd */ + return ret; +} + +/** + * rsa_load_pem_key loads a private key from a file in PEM format. + * It returns a pointer to the key on success or NULL on error. The + * caller is responsible for freeing the key. This function is based + * on code from wireshark: + * https://github.com/wireshark/wireshark/blob/24c8d79d/wsutil/rsa.c#L89 + */ +static gnutls_x509_privkey_t rsa_load_pem_key(const char *privkey_file_path) +{ + gnutls_x509_privkey_t x509_key; + gnutls_datum_t key; + struct stat statbuf; + int ret; + + if (stat(privkey_file_path, &statbuf) < 0) { + pr_perror("Can't stat %s", privkey_file_path); + return NULL; + } + + if (S_ISDIR(statbuf.st_mode)) { + pr_err("%s is a directory\n", privkey_file_path); + return NULL; + } + + if (S_ISFIFO(statbuf.st_mode)) { + pr_err("%s is a FIFO\n", privkey_file_path); + return NULL; + } + + if (!S_ISREG(statbuf.st_mode)) { + pr_err("%s is not a regular file\n", privkey_file_path); + return NULL; + } + + key.data = gnutls_malloc((size_t)statbuf.st_size); + if (!key.data) { + pr_perror("Can't allocate %lu bytes for %s", statbuf.st_size, privkey_file_path); + return NULL; + } + key.size = (int)statbuf.st_size; + + ret = read_file(privkey_file_path, key.data, key.size); + if (ret < 0) { + goto err; + } + + ret = gnutls_x509_privkey_init(&x509_key); + if (ret != GNUTLS_E_SUCCESS) { + tls_perror("Failed to initialize X.509 private key", ret); + goto err; + } + + ret = gnutls_x509_privkey_import(x509_key, &key, GNUTLS_X509_FMT_PEM); + if (ret != GNUTLS_E_SUCCESS) { + tls_perror("Failed to load X.509 private key", ret); + goto err; + } + + if (gnutls_x509_privkey_get_pk_algorithm(x509_key) != GNUTLS_PK_RSA) { + pr_err("Private key is not RSA\n"); + goto err; + } + + gnutls_free(key.data); + return x509_key; + +err: + gnutls_free(key.data); + return NULL; +} + +/** + * tls_initialize_cipher_from_image loads a private key and + * decrypts the token from the cipher.img that is then used + * to decrypt all other images. + */ +int tls_initialize_cipher_from_image(void) +{ + int ret; + char *privkey_file_path = CRIU_KEY; + struct cr_img *img; + CipherEntry *ce; + + gnutls_privkey_t privkey; + gnutls_x509_privkey_t x509_key; + gnutls_datum_t ciphertext; + gnutls_datum_t decrypted_token; + + img = open_image(CR_FD_CIPHER, O_RSTR); + if (!img) + return -1; + + /* If cipher.img is empty, then encryption is not used */ + if (empty_image(img)) { + close_image(img); + opts.encrypt = false; + return 0; + } + opts.encrypt = true; + + if (opts.tls_key) + privkey_file_path = opts.tls_key; + + pr_debug("Loading private key from %s\n", privkey_file_path); + x509_key = rsa_load_pem_key(privkey_file_path); + if (!x509_key) + return -1; + + ret = gnutls_privkey_init(&privkey); + if (ret < 0) { + tls_perror("Failed to initialize private key", ret); + return -1; + } + + ret = gnutls_privkey_import_x509(privkey, x509_key, 0); + if (ret < 0) { + tls_perror("Failed to import private key", ret); + return -1; + } + + ret = pb_read_one(img, &ce, PB_CIPHER); + if (ret < 0) { + pr_err("Failed to read cipher entry\n"); + goto out_close; + } + + ciphertext.data = ce->token.data; + ciphertext.size = ce->token.len; + + ret = gnutls_privkey_decrypt_data(privkey, 0, &ciphertext, &decrypted_token); + if (ret < 0) { + tls_perror("Failed to decrypt token data", ret); + goto out_close; + } + + if (decrypted_token.size != sizeof(token)) { + pr_err("Invalid token size (%d != %lu)\n", decrypted_token.size, sizeof(token)); + goto out_close; + } + + if (memcpy(token, decrypted_token.data, sizeof(token)) != token) { + pr_perror("Failed to copy token data"); + goto out_close; + } + + ret = 0; +out_close: + close_image(img); + return ret; +} + +/** + * _encrypt_data_with_pubkey encrypts the given plaintext with the + * given public key and returns the ciphertext. On success, it + * returns zero or a negative error code on error. + */ +static int _encrypt_data_with_pubkey(gnutls_datum_t *plaintext, gnutls_datum_t *ciphertext) +{ + unsigned int max_block_size, key_len = 0; + int ret; + + ret = gnutls_pubkey_get_pk_algorithm(pubkey, &key_len); + if (ret < 0) { + pr_err("Failed to read public key length\n"); + return -1; + } + if (ret != GNUTLS_PK_RSA) { + pr_err("Public key must be RSA\n"); + return -1; + } + + /* The data must be small enough to use plain RSA + * https://github.com/gnutls/nettle/blob/fe7ae87d/pkcs1-encrypt.c#L66 + */ + max_block_size = key_len / 8 - 11; + if (plaintext->size > max_block_size) { + pr_err("Data size must be less than %u bytes\n", max_block_size); + return -1; + } + + ret = gnutls_pubkey_encrypt_data(pubkey, 0, plaintext, ciphertext); + if (ret < 0) { + tls_perror("Failed to encrypt data", ret); + return -1; + } + + return 0; +} + +/** + * write_img_cipher encrypts the token with RSA public key and writes + * it to cipher.img. + */ +int write_img_cipher(void) +{ + int ret; + struct cr_img *img; + CipherEntry ce = CIPHER_ENTRY__INIT; + gnutls_datum_t plaintext, ciphertext; + + if (!opts.encrypt) { + return 0; + } + + if (!pubkey) { + pr_err("Public key is not initialized\n"); + return -1; + } + + plaintext.data = token; + plaintext.size = sizeof(token); + ret = _encrypt_data_with_pubkey(&plaintext, &ciphertext); + if (ret < 0) { + return -1; + } + ce.token.len = ciphertext.size; + ce.token.data = ciphertext.data; + + pr_debug("Writing cipher image\n"); + img = open_image(CR_FD_CIPHER, O_DUMP); + if (!img) + return -1; + + ret = pb_write_one(img, &ce, PB_CIPHER); + if (ret < 0) { + pr_err("Failed to write ciphertext size to image\n"); + goto err; + } + +err: + gnutls_free(ciphertext.data); + close_image(img); + return ret; +} + +/** + * tls_encrypt_data performs in-place encryption of data with ChaCha20-Poly1305 + * AEAD cipher. A 16-bytes tag and 12-bytes nonce are generated and written to + * tag_data and nonce_data. The caller must ensure that there is enough space + * allocated for the tag and nonce. Different tag and nonce are used for each + * invocation, and they must be provided to decrypt the data. The tag's purpose + * is to verify the integrity of the data, while the nonce is used to prevent + * replay attacks. + */ +int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *nonce_data) +{ + int ret; + giovec_t iov[1]; + gnutls_datum_t key; + static gnutls_aead_cipher_hd_t handle = NULL; + size_t tag_size = gnutls_cipher_get_tag_size(stream_cipher_algorithm); + size_t nonce_len = gnutls_cipher_get_iv_size(stream_cipher_algorithm); + + if (!opts.encrypt) + return -1; + + if (handle == NULL) { + key.data = token; + key.size = gnutls_cipher_get_key_size(stream_cipher_algorithm); + + ret = gnutls_aead_cipher_init(&handle, stream_cipher_algorithm, &key); + if (ret < 0) { + tls_perror("Failed to initialize cipher", ret); + return -1; + } + } + + /* A different 96-bit nonce must be used for each invocation. + * The nonce should never be reused with the same key. + * (RFC 8439, Section 2.8 "AEAD Construction") + */ + ret = gnutls_rnd(GNUTLS_RND_NONCE, nonce_data, nonce_len); + if (ret < 0) { + tls_perror("Failed to generate random nonce", ret); + return -1; + } + + iov[0].iov_base = data; + iov[0].iov_len = data_size; + + ret = gnutls_aead_cipher_encryptv2(handle, nonce_data, nonce_len, NULL, 0, iov, 1, tag_data, &tag_size); + if (ret < 0) { + tls_perror("Failed to encrypt data", ret); + return -1; + } + + return 0; +} + +/** + * tls_decrypt_data performs in-place decryption of given data + * with corresponding tag and nonce. On success, it returns zero + * or a negative error code on error. + */ +int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *nonce_data) +{ + int ret; + giovec_t iov[1]; + gnutls_datum_t key; + gnutls_aead_cipher_hd_t handle = NULL; + size_t tag_size = gnutls_cipher_get_tag_size(stream_cipher_algorithm); + size_t nonce_len = gnutls_cipher_get_iv_size(stream_cipher_algorithm); + + key.data = token; + key.size = gnutls_cipher_get_key_size(stream_cipher_algorithm); + + ret = gnutls_aead_cipher_init(&handle, stream_cipher_algorithm, &key); + if (ret < 0) { + tls_perror("Failed to initialize cipher", ret); + return -1; + } + + iov[0].iov_base = data; + iov[0].iov_len = data_size; + + ret = gnutls_aead_cipher_decryptv2(handle, nonce_data, nonce_len, NULL, 0, iov, 1, tag_data, tag_size); + if (ret < 0) { + tls_perror("Failed to decrypt data", ret); + return -1; + } + + gnutls_aead_cipher_deinit(handle); + + return ret; +} \ No newline at end of file diff --git a/images/Makefile b/images/Makefile index 1e40b8a8f0..6f3fadd9f1 100644 --- a/images/Makefile +++ b/images/Makefile @@ -75,6 +75,7 @@ proto-obj-y += bpfmap-data.o proto-obj-y += apparmor.o proto-obj-y += rseq.o proto-obj-y += pidfd.o +proto-obj-y += cipher.o CFLAGS += -iquote $(obj)/ diff --git a/images/cipher.proto b/images/cipher.proto new file mode 100644 index 0000000000..801043bc51 --- /dev/null +++ b/images/cipher.proto @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: MIT + +syntax = "proto2"; + +message cipher_entry { + required bytes token = 1; +} diff --git a/lib/pycriu/images/images.py b/lib/pycriu/images/images.py index 9db506e1ee..c5bfb7988c 100644 --- a/lib/pycriu/images/images.py +++ b/lib/pycriu/images/images.py @@ -555,6 +555,7 @@ def skip(self, f, pbuff): 'BPFMAP_DATA': entry_handler(pb.bpfmap_data_entry, bpfmap_data_extra_handler()), 'APPARMOR': entry_handler(pb.apparmor_entry), + 'CIPHER': entry_handler(pb.cipher_entry), } From 9bc302fc6d2ee039c12e49e96e53417053bea252 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Dec 2024 13:08:50 +0000 Subject: [PATCH 02/12] zdtm: enable tests with encrypted images This patch extends ZDTM to run `criu dump` with the `--encrypt` option to test the encryption functionality of CRIU images. Signed-off-by: Radostin Stoyanov --- .github/workflows/encrypted-images.yml | 17 +++++++++++++++++ scripts/ci/run-ci-tests.sh | 9 +++++++++ test/zdtm.py | 20 ++++++++++++-------- 3 files changed, 38 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/encrypted-images.yml diff --git a/.github/workflows/encrypted-images.yml b/.github/workflows/encrypted-images.yml new file mode 100644 index 0000000000..3f78e8f4c0 --- /dev/null +++ b/.github/workflows/encrypted-images.yml @@ -0,0 +1,17 @@ +name: Encrypted Images Test + +on: [push, pull_request] + +# Cancel any preceding run on the pull request. +concurrency: + group: encrypted-images-test-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: ${{ github.ref != 'refs/heads/criu-dev' }} + +jobs: + build: + runs-on: ubuntu-22.04 + + steps: + - uses: actions/checkout@v2 + - name: Run CRIU Encrypted Images Test + run: sudo -E make -C scripts/ci local ENCRYPTED_IMAGES_TEST=1 diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh index b472e954c2..f64c222ca4 100755 --- a/scripts/ci/run-ci-tests.sh +++ b/scripts/ci/run-ci-tests.sh @@ -72,6 +72,10 @@ test_stream() { ./test/zdtm.py run --stream -p 2 --keep-going -a "${STREAM_TEST_EXCLUDE[@]}" "${ZDTM_OPTS[@]}" } +test_encrypted_images() { + ./test/zdtm.py run -a --keep-going --encrypt "${TEST_EXCLUDE[@]}" "${ZDTM_OPTS[@]}" +} + print_header() { echo "############### $1 ###############" } @@ -213,6 +217,11 @@ if [ "${STREAM_TEST}" = "1" ]; then exit 0 fi +if [ "${ENCRYPTED_IMAGES_TEST}" = "1" ]; then + test_encrypted_images + exit 0 +fi + ./test/zdtm.py run -a -p 2 --keep-going "${ZDTM_OPTS[@]}" if criu/criu check --feature move_mount_set_group; then ./test/zdtm.py run -a -p 2 --mntns-compat-mode --keep-going "${ZDTM_OPTS[@]}" diff --git a/test/zdtm.py b/test/zdtm.py index 37ebe63b7b..040004b2e6 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -1137,7 +1137,8 @@ def __init__(self, opts): self.__page_server_p = None self.__dump_process = None self.__img_streamer_process = None - self.__tls = self.__tls_options() if opts['tls'] else [] + self.__tls = ['--tls'] + self.__tls_options() if opts['tls'] else [] + self.__encrypt = ['--encrypt'] + self.__tls_options() if opts['encrypt'] else [] self.__criu_bin = opts['criu_bin'] self.__crit_bin = opts['crit_bin'] self.__pre_dump_mode = opts['pre_dump_mode'] @@ -1205,11 +1206,13 @@ def cleanup(self): def __tls_options(self): pki_dir = os.path.dirname(os.path.abspath(__file__)) + "/pki" - return [ - "--tls", "--tls-no-cn-verify", "--tls-key", pki_dir + "/key.pem", - "--tls-cert", pki_dir + "/cert.pem", "--tls-cacert", - pki_dir + "/cacert.pem" + output = [ + "--tls-no-cn-verify", + "--tls-key", pki_dir + "/key.pem", + "--tls-cert", pki_dir + "/cert.pem", + "--tls-cacert", pki_dir + "/cacert.pem" ] + return output def __ddir(self): return os.path.join(self.__dump_path, "%d" % self.__iter) @@ -1433,7 +1436,7 @@ def dump(self, action, opts=[]): os.mkdir(self.__ddir()) os.chmod(self.__ddir(), 0o777) - a_opts = ["--tree", self.__test.getpid()] + a_opts = ["--tree", self.__test.getpid()] + self.__encrypt if self.__prev_dump_iter: a_opts += [ "--prev-images-dir", @@ -1508,7 +1511,7 @@ def dump(self, action, opts=[]): raise test_fail_exc("criu page-server exited with %d" % ret) def restore(self): - r_opts = [] + r_opts = self.__encrypt if self.__restore_sibling: r_opts = ["--restore-sibling"] self.__test.auto_reap = False @@ -2165,7 +2168,7 @@ def run_test(self, name, desc, flavor): 'sat', 'script', 'rpc', 'criu_config', 'lazy_pages', 'join_ns', 'dedup', 'sbs', 'freezecg', 'user', 'dry_run', 'noauto_dedup', 'remote_lazy_pages', 'show_stats', 'lazy_migrate', 'stream', - 'tls', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mntns_compat_mode', + 'tls', 'encrypt', 'criu_bin', 'crit_bin', 'pre_dump_mode', 'mntns_compat_mode', 'rootless', 'preload_libfault', 'mocked_cuda_checkpoint') arg = repr((name, desc, flavor, {d: self.__opts[d] for d in nd})) @@ -2850,6 +2853,7 @@ def get_cli_args(): help="simulate lazy migration", action='store_true') rp.add_argument("--tls", help="use TLS for migration", action='store_true') + rp.add_argument("-e", "--encrypt", help="encrypt images", action='store_true') rp.add_argument("--title", help="A test suite title", default="criu") rp.add_argument("--show-stats", help="Show criu statistics", From 2b63a4a23c5640cfc1cf3b6aed2a20dba0c1a500 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 9 Sep 2023 15:35:00 +0100 Subject: [PATCH 03/12] bpfmap: rename opts to bpfmap_opts 'opts' is defined in cr_options.h. This header will be included in a subsequent patch. We rename the local variable 'opts' to 'bpfmap_opts' to avoid variable shadowing. Signed-off-by: Radostin Stoyanov --- criu/bpfmap.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/criu/bpfmap.c b/criu/bpfmap.c index 25098368d3..f24491638f 100644 --- a/criu/bpfmap.c +++ b/criu/bpfmap.c @@ -70,7 +70,7 @@ int restore_bpfmap_data(int map_fd, uint32_t map_id, struct bpfmap_data_rst **bp void *keys = NULL; void *values = NULL; unsigned int count; - LIBBPF_OPTS(bpf_map_batch_opts, opts); + LIBBPF_OPTS(bpf_map_batch_opts, bpfmap_opts); for (map_data = bpf_hash_table[map_id & BPFMAP_DATA_HASH_MASK]; map_data != NULL; map_data = map_data->next) { if (map_data->bde->map_id == map_id) @@ -99,7 +99,7 @@ int restore_bpfmap_data(int map_fd, uint32_t map_id, struct bpfmap_data_rst **bp } memcpy(values, map_data->data + bde->keys_bytes, bde->values_bytes); - if (bpf_map_update_batch(map_fd, keys, values, &count, &opts)) { + if (bpf_map_update_batch(map_fd, keys, values, &count, &bpfmap_opts)) { pr_perror("Can't load key-value pairs to BPF map"); goto err; } @@ -153,7 +153,7 @@ int dump_one_bpfmap_data(BpfmapFileEntry *bpf, int lfd, const struct fd_parms *p void *keys = NULL, *values = NULL; void *in_batch = NULL, *out_batch = NULL; BpfmapDataEntry bde = BPFMAP_DATA_ENTRY__INIT; - LIBBPF_OPTS(bpf_map_batch_opts, opts); + LIBBPF_OPTS(bpf_map_batch_opts, bpfmap_opts); int ret; key_size = bpf->key_size; @@ -179,7 +179,7 @@ int dump_one_bpfmap_data(BpfmapFileEntry *bpf, int lfd, const struct fd_parms *p goto err; } - ret = bpf_map_lookup_batch(lfd, in_batch, out_batch, keys, values, &count, &opts); + ret = bpf_map_lookup_batch(lfd, in_batch, out_batch, keys, values, &count, &bpfmap_opts); if (ret && errno != ENOENT) { pr_perror("Can't perform a batch lookup on BPF map"); goto err; From bdca0e2f1c67251eec26c447ab1266b14135fd7e Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 9 Sep 2023 17:22:50 +0100 Subject: [PATCH 04/12] bpfmap: optimize dump of keys/values We calculate the total memory size needed for both keys and values and allocate a single contiguous memory region using a single mmap call. In a subsequent patch, this change would enable encrypting the combined memory region using a single pair of ChaCha20-Poly1305 tag and nonce. Signed-off-by: Radostin Stoyanov --- criu/bpfmap.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/criu/bpfmap.c b/criu/bpfmap.c index f24491638f..861d33dd92 100644 --- a/criu/bpfmap.c +++ b/criu/bpfmap.c @@ -149,8 +149,8 @@ int dump_one_bpfmap_data(BpfmapFileEntry *bpf, int lfd, const struct fd_parms *p */ struct cr_img *img; - uint32_t key_size, value_size, max_entries, count; - void *keys = NULL, *values = NULL; + uint32_t key_size, value_size, total_size, max_entries, count; + void *keys = NULL, *values = NULL, *map_memory = NULL; void *in_batch = NULL, *out_batch = NULL; BpfmapDataEntry bde = BPFMAP_DATA_ENTRY__INIT; LIBBPF_OPTS(bpf_map_batch_opts, bpfmap_opts); @@ -161,17 +161,16 @@ int dump_one_bpfmap_data(BpfmapFileEntry *bpf, int lfd, const struct fd_parms *p max_entries = bpf->max_entries; count = max_entries; - keys = mmap(NULL, key_size * max_entries, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0); - if (keys == MAP_FAILED) { - pr_perror("Can't map memory for BPF map keys"); + /* To enable in-place encryption, we use single memory map for both keys and values */ + total_size = (key_size + value_size) * max_entries; + map_memory = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0); + if (map_memory == MAP_FAILED) { + pr_perror("Can't map memory for BPF map keys and values"); goto err; } - values = mmap(NULL, value_size * max_entries, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0); - if (values == MAP_FAILED) { - pr_perror("Can't map memory for BPF map values"); - goto err; - } + keys = map_memory; + values = map_memory + (key_size * max_entries); out_batch = mmap(NULL, key_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, 0, 0); if (out_batch == MAP_FAILED) { @@ -195,23 +194,17 @@ int dump_one_bpfmap_data(BpfmapFileEntry *bpf, int lfd, const struct fd_parms *p if (pb_write_one(img, &bde, PB_BPFMAP_DATA)) goto err; - if (write(img_raw_fd(img), keys, key_size * count) != (key_size * count)) { - pr_perror("Can't write BPF map's keys"); - goto err; - } - if (write(img_raw_fd(img), values, value_size * count) != (value_size * count)) { - pr_perror("Can't write BPF map's values"); + if (write(img_raw_fd(img), map_memory, total_size) != total_size) { + pr_perror("Can't write BPF map's keys and values"); goto err; } - munmap(keys, key_size * max_entries); - munmap(values, value_size * max_entries); + munmap(map_memory, total_size); munmap(out_batch, key_size); return 0; err: - munmap(keys, key_size * max_entries); - munmap(values, value_size * max_entries); + munmap(map_memory, total_size); munmap(out_batch, key_size); return -1; } From 24ed7e38743113c9f7cc1d835ffecaa684f6be2f Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sat, 9 Sep 2023 17:40:46 +0100 Subject: [PATCH 05/12] bpfmap: enable encryption of key/value data This patch extends dump_one_bpfmap_data() with support for encryption. Signed-off-by: Radostin Stoyanov --- criu/bpfmap.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/criu/bpfmap.c b/criu/bpfmap.c index 861d33dd92..7ff370c46d 100644 --- a/criu/bpfmap.c +++ b/criu/bpfmap.c @@ -2,11 +2,13 @@ #include #include "common/compiler.h" +#include "cr_options.h" #include "imgset.h" #include "bpfmap.h" #include "fdinfo.h" #include "image.h" #include "util.h" +#include "tls.h" #include "log.h" #include "protobuf.h" @@ -155,6 +157,7 @@ int dump_one_bpfmap_data(BpfmapFileEntry *bpf, int lfd, const struct fd_parms *p BpfmapDataEntry bde = BPFMAP_DATA_ENTRY__INIT; LIBBPF_OPTS(bpf_map_batch_opts, bpfmap_opts); int ret; + chacha20_poly1305_t cipher_data; key_size = bpf->key_size; value_size = bpf->value_size; @@ -194,11 +197,31 @@ int dump_one_bpfmap_data(BpfmapFileEntry *bpf, int lfd, const struct fd_parms *p if (pb_write_one(img, &bde, PB_BPFMAP_DATA)) goto err; + if (opts.encrypt) { + /* Encrypt buffer data using ChaCha20-Poly1305 */ + if (tls_encrypt_data(map_memory, total_size, cipher_data.tag, cipher_data.nonce)) { + pr_err("Can't encrypt BPF map's keys and values\n"); + goto err; + } + } + if (write(img_raw_fd(img), map_memory, total_size) != total_size) { pr_perror("Can't write BPF map's keys and values"); goto err; } + if (opts.encrypt) { + /* Write ChaCha20-Poly1305 tag data */ + if (write(img_raw_fd(img), cipher_data.tag, sizeof(cipher_data.tag)) != sizeof(cipher_data.tag)) { + pr_perror("Can't write BPF map's tag data"); + goto err; + } + if (write(img_raw_fd(img), cipher_data.nonce, sizeof(cipher_data.nonce)) != sizeof(cipher_data.nonce)) { + pr_perror("Can't write BPF map's nonce data"); + goto err; + } + } + munmap(map_memory, total_size); munmap(out_batch, key_size); return 0; From 775ead5512c8dc8797335221f9834ef350395959 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 10 Mar 2023 11:34:45 +0000 Subject: [PATCH 06/12] tls: enable ghost image and pipe data encryption During checkpoint, the contents of ghost images and pipe data is splice()-ed between file descriptors. To enable encryption for this data we introduce `tls_encrypt_file_data()` and `tls_decrypt_file_data()`. These functions read data from input file descriptor, perform encryption/decryption of the data, and write it to the corresponding output file descriptor. Signed-off-by: Radostin Stoyanov --- criu/files-reg.c | 18 +++- criu/include/tls.h | 6 ++ criu/pipes.c | 27 +++--- criu/tls.c | 216 ++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 252 insertions(+), 15 deletions(-) diff --git a/criu/files-reg.c b/criu/files-reg.c index fc61493501..a0223c28af 100644 --- a/criu/files-reg.c +++ b/criu/files-reg.c @@ -16,6 +16,7 @@ #include #include +#include "tls.h" #include "tty.h" #include "stats.h" @@ -375,8 +376,14 @@ static int mkreg_ghost(char *path, GhostFileEntry *gfe, struct cr_img *img) } ret = copy_file_from_chunks(img, gfd, gfe->size); - } else - ret = copy_file(img_raw_fd(img), gfd, 0); + } else { + if (opts.encrypt) { + ret = tls_decrypt_file_data(img_raw_fd(img), gfd, gfe->size); + } else { + ret = copy_file(img_raw_fd(img), gfd, 0); + } + } + if (ret < 0) unlink(path); close(gfd); @@ -981,6 +988,7 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de goto err_out; } + if (gfe.chunks) { if (opts.ghost_fiemap) { ret = copy_file_to_chunks_fiemap(fd, img, st->st_size); @@ -992,7 +1000,11 @@ static int dump_ghost_file(int _fd, u32 id, const struct stat *st, dev_t phys_de ret = copy_file_to_chunks(fd, img, st->st_size); } } else { - ret = copy_file(fd, img_raw_fd(img), st->st_size); + if (opts.encrypt) { + ret = tls_encrypt_file_data(fd, img_raw_fd(img), st->st_size); + } else { + ret = copy_file(fd, img_raw_fd(img), st->st_size); + } } close(fd); diff --git a/criu/include/tls.h b/criu/include/tls.h index 8afac963af..34833c9586 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -25,6 +25,9 @@ int tls_initialize_cipher(void); int tls_initialize_cipher_from_image(void); int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *nonce_data); int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *nonce_data); +int tls_encrypt_pipe_data(int fd_in, int fd_out, size_t data_size); +int tls_encrypt_file_data(int fd_in, int fd_out, size_t data_size); +int tls_decrypt_file_data(int fd_in, int fd_out, size_t data_size); #else /* CONFIG_GNUTLS */ @@ -38,6 +41,9 @@ int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n #define tls_initialize_cipher_from_image() (0) #define tls_encrypt_data(data, data_size, tag_data, nonce_data) (-1) #define tls_decrypt_data(data, data_size, tag_data, nonce_data) (-1) +#define tls_encrypt_pipe_data(fd_in, fd_out, data_size) (-1) +#define tls_encrypt_file_data(fd_in, fd_out, data_size) (-1) +#define tls_decrypt_file_data(fd_in, fd_out, data_size) (-1) #define write_img_cipher() (0) #endif /* CONFIG_HAS_GNUTLS */ diff --git a/criu/pipes.c b/criu/pipes.c index daada88306..0397dfc44e 100644 --- a/criu/pipes.c +++ b/criu/pipes.c @@ -18,6 +18,8 @@ #include "images/pipe-data.pb-c.h" #include "fcntl.h" #include "namespaces.h" +#include "cr_options.h" +#include "tls.h" static LIST_HEAD(pipes); @@ -455,19 +457,22 @@ int dump_one_pipe_data(struct pipe_data_dump *pd, int lfd, const struct fd_parms if (pb_write_one(img, &pde, PB_PIPE_DATA)) goto err_close; - while (bytes > 0) { - int wrote; - wrote = splice(steal_pipe[0], NULL, img_raw_fd(img), NULL, bytes, 0); - if (wrote < 0) { - pr_perror("Can't push pipe data"); - goto err_close; - } else if (wrote == 0) - break; - bytes -= wrote; + if (opts.encrypt) { + ret = tls_encrypt_pipe_data(steal_pipe[0], img_raw_fd(img), bytes); + } else { + while (bytes > 0) { + int wrote; + wrote = splice(steal_pipe[0], NULL, img_raw_fd(img), NULL, bytes, 0); + if (wrote < 0) { + pr_perror("Can't push pipe data"); + goto err_close; + } else if (wrote == 0) + break; + bytes -= wrote; + } + ret = 0; } - ret = 0; - err_close: close(steal_pipe[0]); close(steal_pipe[1]); diff --git a/criu/tls.c b/criu/tls.c index 1766ca5358..90da37526f 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -10,11 +11,13 @@ #include #include +#include "page.h" #include "imgset.h" #include "images/cipher.pb-c.h" #include "protobuf.h" #include "cr_options.h" #include "xmalloc.h" +#include "tls.h" /* Compatibility with GnuTLS version < 3.5 */ #ifndef GNUTLS_E_CERTIFICATE_VERIFICATION_ERROR @@ -840,4 +843,215 @@ int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n gnutls_aead_cipher_deinit(handle); return ret; -} \ No newline at end of file +} + +/** + * tls_encrypt_pipe_data reads data from fd_in, encrypts the data, and writes + * the encrypted data to fd_out. If data_size is zero, then the function + * returns immediately without reading or writing any data. + */ +int tls_encrypt_pipe_data(int fd_in, int fd_out, size_t data_size) +{ + int exit_code = -1; + ssize_t ret, bytes_read = 0; + cleanup_free uint8_t *buf = NULL; + chacha20_poly1305_t cipher_data; + + if (data_size == 0) + return 0; + + buf = xmalloc(data_size); + if (!buf) + return -1; + + while (bytes_read < data_size) { + ret = read(fd_in, buf + bytes_read, data_size - bytes_read); + if (ret < 0) { + pr_perror("Can't read file data"); + goto err; + } + if (ret == 0) { + break; + } + bytes_read += ret; + } + + /* Encrypt buffer data using ChaCha20-Poly1305 */ + if (tls_encrypt_data(buf, data_size, cipher_data.tag, cipher_data.nonce) < 0) { + pr_err("Failed to encrypt buffer data\n"); + goto err; + } + + /* Write ciphertext data */ + ret = write(fd_out, buf, data_size); + if (ret != data_size) { + if (ret < 0) + pr_perror("Failed to write file data"); + else + pr_err("Failed to write all data to image file (%zd != %zd)\n", ret, bytes_read); + goto err; + } + + /* Write tag data */ + if (write(fd_out, cipher_data.tag, sizeof(cipher_data.tag)) != sizeof(cipher_data.tag)) { + pr_err("Failed to write tag data to image file\n"); + goto err; + } + + /* Write nonce data */ + if (write(fd_out, cipher_data.nonce, sizeof(cipher_data.nonce)) != sizeof(cipher_data.nonce)) { + pr_err("Failed to write nonce data to image file\n"); + goto err; + } + + exit_code = 0; +err: + return exit_code; +} + +/** + * tls_encrypt_file_data reads data from fd_in, encrypts the data, and writes + * the encrypted data to fd_out. The function reads all data from fd_in until EOF. + * If data_size is non-zero, then it checks that the total number of bytes read + * is equal to data_size. + */ +int tls_encrypt_file_data(int fd_in, int fd_out, size_t data_size) +{ + int exit_code = -1; + ssize_t ret, bytes_read = 0, total = 0; + uint8_t buf[PAGE_SIZE]; + chacha20_poly1305_t cipher_data; + + while (1) { + bytes_read = 0; + while (bytes_read < sizeof(buf)) { + ret = read(fd_in, buf + bytes_read, sizeof(buf) - bytes_read); + if (ret < 0) { + pr_perror("Can't read file data"); + goto err; + } + if (ret == 0) + break; /* EOF */ + + bytes_read += ret; + } + + if (bytes_read == 0) + break; /* EOF */ + + /* Encrypt buffer data using ChaCha20-Poly1305 */ + if (tls_encrypt_data(buf, bytes_read, cipher_data.tag, cipher_data.nonce) < 0) { + pr_err("Failed to encrypt buffer data\n"); + goto err; + } + + /* Write tag data */ + if (write(fd_out, cipher_data.tag, sizeof(cipher_data.tag)) != sizeof(cipher_data.tag)) { + pr_err("Failed to write tag data to image file\n"); + goto err; + } + + /* Write nonce data */ + if (write(fd_out, cipher_data.nonce, sizeof(cipher_data.nonce)) != sizeof(cipher_data.nonce)) { + pr_err("Failed to write nonce data to image file\n"); + goto err; + } + + /* Write size of data */ + if (write(fd_out, &bytes_read, sizeof(bytes_read)) != sizeof(bytes_read)) { + pr_err("Failed to write size of data to image file\n"); + goto err; + } + + /* Write ciphertext data */ + ret = write(fd_out, buf, bytes_read); + if (ret != bytes_read) { + if (ret < 0) + pr_perror("Failed to write file data"); + else + pr_err("Failed to write all data to image file (%zd != %zd)\n", ret, bytes_read); + goto err; + } + total += ret; + } + + if (data_size && total != data_size) { + pr_err("File size mismatch (%zd != %zd)\n", total, data_size); + goto err; + } + + exit_code = 0; +err: + return exit_code; +} + +/** + * tls_decrypt_file_data reads encrypted data from fd_in, decrypts the data, + * and writes the decrypted data to fd_out. The function reads all data from + * fd_in until EOF and if data_size is non-zero, then it checks that the + * total number of bytes read is equal to data_size. + */ +int tls_decrypt_file_data(int fd_in, int fd_out, size_t data_size) +{ + chacha20_poly1305_t cipher_data; + ssize_t ret, bytes_read = 0, total = 0; + int exit_code = -1; + uint8_t buf[PAGE_SIZE]; + + while (1) { + /* Read tag data */ + ret = read(fd_in, cipher_data.tag, sizeof(cipher_data.tag)); + if (ret == 0) + break; /* EOF */ + if (ret != sizeof(cipher_data.tag)) { + pr_perror("Failed to read tag data (%lu != %lu)", ret, sizeof(cipher_data.tag)); + goto err; + } + + /* Read nonce data */ + ret = read(fd_in, cipher_data.nonce, sizeof(cipher_data.nonce)); + if (ret != sizeof(cipher_data.nonce)) { + pr_perror("Failed to read nonce data (%lu != %lu)", ret, sizeof(cipher_data.nonce)); + goto err; + } + + /* Read data size */ + ret = read(fd_in, &bytes_read, sizeof(bytes_read)); + if (ret != sizeof(bytes_read)) { + pr_perror("Failed to read data size (%lu != %lu)", ret, sizeof(bytes_read)); + goto err; + } + + ret = read(fd_in, buf, bytes_read); + if (ret != bytes_read) { + pr_perror("Failed to read file data (%lu != %lu)", ret, bytes_read); + goto err; + } + + /* Decrypt buffer data using ChaCha20-Poly1305 */ + if (tls_decrypt_data(buf, bytes_read, cipher_data.tag, cipher_data.nonce) < 0) { + pr_err("Failed to decrypt buffer data\n"); + goto err; + } + + /* Write plaintext data */ + ret = write(fd_out, buf, bytes_read); + if (ret != bytes_read) { + if (ret < 0) + pr_perror("Failed to write file data"); + else + pr_err("Failed to write all data to file (%zd != %zd)\n", ret, bytes_read); + goto err; + } + total += ret; + } + + if (data_size && total != data_size) { + pr_err("File size mismatch (%zd != %zd)\n", total, data_size); + goto err; + } + + exit_code = 0; +err: + return exit_code; +} From 11eac8a04caf8c19091b6b26b7a2944bcc391a61 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Fri, 17 Feb 2023 16:06:14 +0000 Subject: [PATCH 07/12] crit: add support for decoding encrypted images This patch extends CRIT with the ability to decode encrypted images. When `cipher.img` is present, crit will load the corresponding private key (from /etc/pki/criu/private/key.pem), decrypt the cipher token and use it to decrypt the protobuf entries in the image that is being decoded. Signed-off-by: Radostin Stoyanov --- .cirrus.yml | 2 +- crit/crit/__main__.py | 109 +++++++++++++++++++---- lib/pycriu/images/images.py | 108 +++++++++++++++------- scripts/build/Dockerfile.alpine | 2 +- scripts/build/Dockerfile.centos8 | 2 +- scripts/ci/prepare-for-fedora-rawhide.sh | 1 + scripts/ci/vagrant.sh | 2 +- 7 files changed, 171 insertions(+), 55 deletions(-) diff --git a/.cirrus.yml b/.cirrus.yml index 5e30ca2c2b..dcd5cc72f3 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -36,7 +36,7 @@ task: ln -sf /usr/include/google/protobuf/descriptor.proto images/google/protobuf/descriptor.proto dnf config-manager --set-enabled crb # Same as CentOS 8 powertools dnf -y install epel-release epel-next-release - dnf -y install --allowerasing asciidoc gcc git gnutls-devel libaio-devel libasan libcap-devel libnet-devel libnl3-devel libbsd-devel libselinux-devel make protobuf-c-devel protobuf-devel python-devel python-PyYAML python-protobuf python-junit_xml python3-importlib-metadata xmlto libdrm-devel + dnf -y install --allowerasing asciidoc gcc git gnutls-devel libaio-devel libasan libcap-devel libnet-devel libnl3-devel libbsd-devel libselinux-devel make protobuf-c-devel protobuf-devel python-devel python-PyYAML python-protobuf python-junit_xml python3-importlib-metadata python3-cryptography xmlto libdrm-devel # The image has a too old version of nettle which does not work with gnutls. # Just upgrade to the latest to make the error go away. dnf -y upgrade nettle nettle-devel diff --git a/crit/crit/__main__.py b/crit/crit/__main__.py index bce5234456..f2c1d37b9a 100755 --- a/crit/crit/__main__.py +++ b/crit/crit/__main__.py @@ -3,11 +3,19 @@ import sys import json import os +import base64 + +from cryptography.hazmat.primitives.asymmetric import rsa, padding +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.backends import default_backend import pycriu from . import __version__ +CRIU_KEY = '/etc/pki/criu/private/key.pem' + + def inf(opts): if opts['in']: return open(opts['in'], 'rb') @@ -36,11 +44,56 @@ def dinf(opts, name): return open(os.path.join(opts['dir'], name), mode='rb') +def get_cipher_token(opts): + """ + get_cipher_token returns the decrypted cipher token. + """ + plaintext_token = None + if 'in' in opts: + dir_path = os.path.dirname(os.path.realpath(opts['in'])) + elif 'dir' in opts: + dir_path = os.path.realpath(opts['dir']) + else: + raise TypeError("Invalid input") + cipher_img_path = os.path.join(dir_path, 'cipher.img') + + # We assume that when this image is not present, + # the checkpoint images are not encrypted. Thus, + # here we continue with normal decode if 'cipher.img' + # doesn't exist. + if not os.path.exists(cipher_img_path): + return None + + with open(cipher_img_path, mode='rb') as cipher_img_file: + cipher_img = pycriu.images.load(cipher_img_file) + # Validate the content of the cipher.img + if ('entries' not in cipher_img or + len(cipher_img['entries']) != 1 or + 'token' not in cipher_img['entries'][0]): + raise TypeError("Invalid cipher image") + + encrypted_token = base64.b64decode(cipher_img['entries'][0]['token']) + + priv_key_file = opts['tls_key'] + with open(priv_key_file, "rb") as f: + priv_key = serialization.load_pem_private_key(f.read(), None, default_backend()) + if not isinstance(priv_key, rsa.RSAPrivateKey): + raise TypeError("Only RSA private keys are supported.") + + # GnuTLS uses the PKCS#1 v1.5 padding scheme by default. + plaintext_token = priv_key.decrypt(encrypted_token, padding.PKCS1v15()) + return plaintext_token + + def decode(opts): indent = None + token = None + + if opts['in'] and os.path.basename(opts['in']) not in ['cipher.img', 'stats-dump', 'stats-restore']: + token = get_cipher_token(opts) try: - img = pycriu.images.load(inf(opts), opts['pretty'], opts['nopl']) + img = pycriu.images.load(inf(opts), opts['pretty'], opts['nopl'], token=token) except pycriu.images.MagicException as exc: print("Unknown magic %#x.\n" "Maybe you are feeding me an image with " @@ -101,10 +154,11 @@ def show_ps(p, opts, depth=0): def explore_ps(opts): pss = {} - ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) + token = get_cipher_token(opts) + ps_img = pycriu.images.load(dinf(opts, 'pstree.img'), token=token) for p in ps_img['entries']: core = pycriu.images.load( - dinf(opts, 'core-%d.img' % get_task_id(p, 'pid'))) + dinf(opts, 'core-%d.img' % get_task_id(p, 'pid')), token=token) ps = ps_item(p, core['entries'][0]) pss[ps.pid] = ps @@ -128,10 +182,11 @@ def explore_ps(opts): def ftype_find_in_files(opts, ft, fid): global files_img + token = get_cipher_token(opts) if files_img is None: try: - files_img = pycriu.images.load(dinf(opts, "files.img"))['entries'] + files_img = pycriu.images.load(dinf(opts, "files.img"), token=token)['entries'] except Exception: files_img = [] @@ -146,6 +201,7 @@ def ftype_find_in_files(opts, ft, fid): def ftype_find_in_image(opts, ft, fid, img): + token = get_cipher_token(opts) f = ftype_find_in_files(opts, ft, fid) if f: if ft['field'] in f: @@ -154,7 +210,7 @@ def ftype_find_in_image(opts, ft, fid, img): return None if ft['img'] is None: - ft['img'] = pycriu.images.load(dinf(opts, img))['entries'] + ft['img'] = pycriu.images.load(dinf(opts, img), token=token)['entries'] for f in ft['img']: if f['id'] == fid: return f @@ -218,18 +274,19 @@ def get_file_str(opts, fd): def explore_fds(opts): - ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) + token = get_cipher_token(opts) + ps_img = pycriu.images.load(dinf(opts, 'pstree.img'), token=token) for p in ps_img['entries']: pid = get_task_id(p, 'pid') - idi = pycriu.images.load(dinf(opts, 'ids-%s.img' % pid)) + idi = pycriu.images.load(dinf(opts, 'ids-%s.img' % pid), token=token) fdt = idi['entries'][0]['files_id'] - fdi = pycriu.images.load(dinf(opts, 'fdinfo-%d.img' % fdt)) + fdi = pycriu.images.load(dinf(opts, 'fdinfo-%d.img' % fdt), token=token) print("%d" % pid) for fd in fdi['entries']: print("\t%7d: %s" % (fd['fd'], get_file_str(opts, fd))) - fdi = pycriu.images.load(dinf(opts, 'fs-%d.img' % pid))['entries'][0] + fdi = pycriu.images.load(dinf(opts, 'fs-%d.img' % pid), token=token)['entries'][0] print("\t%7s: %s" % ('cwd', get_file_str(opts, { 'type': 'REG', @@ -258,11 +315,12 @@ def get(self, iid): def explore_mems(opts): - ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) + token = get_cipher_token(opts) + ps_img = pycriu.images.load(dinf(opts, 'pstree.img'), token=token) vids = vma_id() for p in ps_img['entries']: pid = get_task_id(p, 'pid') - mmi = pycriu.images.load(dinf(opts, 'mm-%d.img' % pid))['entries'][0] + mmi = pycriu.images.load(dinf(opts, 'mm-%d.img' % pid), token=token)['entries'][0] print("%d" % pid) print("\t%-36s %s" % ('exe', @@ -311,12 +369,13 @@ def explore_mems(opts): def explore_rss(opts): - ps_img = pycriu.images.load(dinf(opts, 'pstree.img')) + token = get_cipher_token(opts) + ps_img = pycriu.images.load(dinf(opts, 'pstree.img'), token=token) for p in ps_img['entries']: pid = get_task_id(p, 'pid') - vmas = pycriu.images.load(dinf(opts, 'mm-%d.img' % - pid))['entries'][0]['vmas'] - pms = pycriu.images.load(dinf(opts, 'pagemap-%d.img' % pid))['entries'] + vmas = pycriu.images.load( + dinf(opts, 'mm-%d.img' % pid), token=token)['entries'][0]['vmas'] + pms = pycriu.images.load(dinf(opts, 'pagemap-%d.img' % pid), token=token)['entries'] print("%d" % pid) vmi = 0 @@ -385,6 +444,10 @@ def main(): '-o', '--out', help='where to put criu image in json format (stdout by default)') + decode_parser.add_argument( + '--tls-key', default=CRIU_KEY, + help=f'path to private key in PEM format used to decrypt images (default: {CRIU_KEY})' + ) decode_parser.set_defaults(func=decode, nopl=False) # Encode @@ -409,15 +472,25 @@ def main(): x_parser = subparsers.add_parser('x', help='explore image dir') x_parser.add_argument('dir') x_parser.add_argument('what', choices=['ps', 'fds', 'mems', 'rss']) + x_parser.add_argument( + '--tls-key', default=CRIU_KEY, + help=f'path to private key in PEM format used to decrypt images (default: {CRIU_KEY})' + ) x_parser.set_defaults(func=explore) # Show show_parser = subparsers.add_parser( 'show', help="convert criu image from binary to human-readable json") show_parser.add_argument("in") - show_parser.add_argument('--nopl', - help='do not show entry payload (if exists)', - action='store_true') + show_parser.add_argument( + '--nopl', + help='do not show entry payload (if exists)', + action='store_true' + ) + show_parser.add_argument( + '--tls-key', default=CRIU_KEY, + help=f'path to private key in PEM format used to decrypt images (default: {CRIU_KEY})' + ) show_parser.set_defaults(func=decode, pretty=True, out=None) opts = vars(parser.parse_args()) diff --git a/lib/pycriu/images/images.py b/lib/pycriu/images/images.py index c5bfb7988c..b56bc0396b 100644 --- a/lib/pycriu/images/images.py +++ b/lib/pycriu/images/images.py @@ -63,6 +63,39 @@ def round_up(x, y): return (((x - 1) | (y - 1)) + 1) +def read_entry_data(f, size, token): + """ + A helper function for reading data from images. + """ + if size is None: + entry_data = f.read() + else: + entry_data = f.read(size) + + if token is not None: + from cryptography.hazmat.primitives.ciphers.aead import ChaCha20Poly1305 + + # Read 128-bits tag data and 96-bits nonce data for ChaCha20-Poly1305 + if size is None: + if len(entry_data) <= 28: + raise IOError("Entry data is too short (%d bytes)" % len(entry_data)) + # Read nonce and tag data in reverse order from the end. + nonce_data = entry_data[-12:] + tag_data = entry_data[-28:-12] + entry_data = entry_data[:-28] + else: + tag_data = f.read(16) + if tag_data == b'': + raise IOError("Can't read tag data") + nonce_data = f.read(12) + if nonce_data == b'': + raise IOError("Can't read nonce data") + + cipher = ChaCha20Poly1305(token) + entry_data = cipher.decrypt(nonce_data, entry_data + tag_data, None) + return entry_data + + class MagicException(Exception): def __init__(self, magic): self.magic = magic @@ -93,7 +126,7 @@ def __init__(self, payload, extra_handler=None): self.payload = payload self.extra_handler = extra_handler - def load(self, f, pretty=False, no_payload=False): + def load(self, f, pretty=False, no_payload=False, token=None): """ Convert criu image entries from binary format to dict(json). Takes a file-like object and returns a list with entries in @@ -109,8 +142,10 @@ def load(self, f, pretty=False, no_payload=False): buf = f.read(4) if len(buf) == 0: break - size, = struct.unpack('i', buf) - pbuff.ParseFromString(f.read(size)) + + size, = struct.unpack('I', buf) + entry_data = read_entry_data(f, size, token) + pbuff.ParseFromString(entry_data) entry = pb2dict.pb2dict(pbuff, pretty) # Read extra @@ -198,7 +233,7 @@ class pagemap_handler: of pagemap_entry type. """ - def load(self, f, pretty=False, no_payload=False): + def load(self, f, pretty=False, no_payload=False, token=None): entries = [] pbuff = pb.pagemap_head() @@ -206,8 +241,10 @@ def load(self, f, pretty=False, no_payload=False): buf = f.read(4) if len(buf) == 0: break - size, = struct.unpack('i', buf) - pbuff.ParseFromString(f.read(size)) + size, = struct.unpack('I', buf) + + entry_data = read_entry_data(f, size, token) + pbuff.ParseFromString(entry_data) entries.append(pb2dict.pb2dict(pbuff, pretty)) pbuff = pb.pagemap_entry() @@ -240,13 +277,15 @@ def count(self, f): # Special handler for ghost-file.img class ghost_file_handler: - def load(self, f, pretty=False, no_payload=False): + def load(self, f, pretty=False, no_payload=False, token=None): entries = [] gf = pb.ghost_file_entry() buf = f.read(4) - size, = struct.unpack('i', buf) - gf.ParseFromString(f.read(size)) + size, = struct.unpack('I', buf) + + entry_data = read_entry_data(f, size, token) + gf.ParseFromString(entry_data) g_entry = pb2dict.pb2dict(gf, pretty) if gf.chunks: @@ -256,19 +295,23 @@ def load(self, f, pretty=False, no_payload=False): buf = f.read(4) if len(buf) == 0: break - size, = struct.unpack('i', buf) - gc.ParseFromString(f.read(size)) + size, = struct.unpack('I', buf) + + entry_data = read_entry_data(f, size, token) + gc.ParseFromString(entry_data) entry = pb2dict.pb2dict(gc, pretty) if no_payload: f.seek(gc.len, os.SEEK_CUR) else: - entry['extra'] = base64.encodebytes(f.read(gc.len)).decode('utf-8') + extra_data = read_entry_data(f, gc.len, token) + entry['extra'] = base64.encodebytes(extra_data).decode('utf-8') entries.append(entry) else: if no_payload: f.seek(0, os.SEEK_END) else: - g_entry['extra'] = base64.encodebytes(f.read()).decode('utf-8') + extra_data = read_entry_data(f, None, token) + g_entry['extra'] = base64.encodebytes(extra_data).decode('utf-8') entries.append(g_entry) return entries @@ -311,10 +354,9 @@ def dumps(self, entries): # do not store big amounts of binary data. They # are negligible comparing to pages size. class pipes_data_extra_handler: - def load(self, f, pload): - size = pload.bytes - data = f.read(size) - return base64.encodebytes(data).decode('utf-8') + def load(self, f, pload, token=None): + entry_data = read_entry_data(f, pload.bytes, token) + return base64.encodebytes(entry_data).decode('utf-8') def dump(self, extra, f, pload): data = decode_base64_data(extra) @@ -326,10 +368,9 @@ def skip(self, f, pload): class sk_queues_extra_handler: - def load(self, f, pload): - size = pload.length - data = f.read(size) - return base64.encodebytes(data).decode('utf-8') + def load(self, f, pload, token=None): + entry_data = read_entry_data(f, pload.length, token) + return base64.encodebytes(entry_data).decode('utf-8') def dump(self, extra, f, _unused): data = decode_base64_data(extra) @@ -341,11 +382,11 @@ def skip(self, f, pload): class tcp_stream_extra_handler: - def load(self, f, pbuff): + def load(self, f, pbuff, token=None): d = {} - inq = f.read(pbuff.inq_len) - outq = f.read(pbuff.outq_len) + inq = read_entry_data(f, pbuff.inq_len, token) + outq = read_entry_data(f, pbuff.outq_len, token) d['inq'] = base64.encodebytes(inq).decode('utf-8') d['outq'] = base64.encodebytes(outq).decode('utf-8') @@ -365,9 +406,9 @@ def skip(self, f, pbuff): class bpfmap_data_extra_handler: - def load(self, f, pload): + def load(self, f, pload, token=None): size = pload.keys_bytes + pload.values_bytes - data = f.read(size) + data = read_entry_data(f, size, token) return base64.encodebytes(data).decode('utf-8') def dump(self, extra, f, pload): @@ -380,7 +421,7 @@ def skip(self, f, pload): class ipc_sem_set_handler: - def load(self, f, pbuff): + def load(self, f, pbuff, token=None): entry = pb2dict.pb2dict(pbuff) size = sizeof_u16 * entry['nsems'] rounded = round_up(size, sizeof_u64) @@ -435,7 +476,7 @@ def skip(self, f, pbuff): _, pl_len = self._read_messages(f, pbuff, skip_data=True) return pl_len - def _read_messages(self, f, pbuff, skip_data=False): + def _read_messages(self, f, pbuff, skip_data=False, token=None): entry = pb2dict.pb2dict(pbuff) messages = [] pl_len = 0 @@ -445,7 +486,8 @@ def _read_messages(self, f, pbuff, skip_data=False): break size, = struct.unpack('i', buf) msg = pb.ipc_msg() - msg.ParseFromString(f.read(size)) + entry_data = read_entry_data(f, size, token) + msg.ParseFromString(entry_data) rounded = round_up(msg.msize, sizeof_u64) pl_len += size + msg.msize @@ -461,10 +503,10 @@ def _read_messages(self, f, pbuff, skip_data=False): class ipc_shm_handler: - def load(self, f, pbuff): + def load(self, f, pbuff, token=None): entry = pb2dict.pb2dict(pbuff) size = entry['size'] - data = f.read(size) + data = read_entry_data(f, size, token) rounded = round_up(size, sizeof_u32) f.seek(rounded - size, 1) return base64.encodebytes(data).decode('utf-8') @@ -579,7 +621,7 @@ def __rhandler(f): return m, handler -def load(f, pretty=False, no_payload=False): +def load(f, pretty=False, no_payload=False, token=None): """ Convert criu image from binary format to dict(json). Takes a file-like object to read criu image from. @@ -590,7 +632,7 @@ def load(f, pretty=False, no_payload=False): m, handler = __rhandler(f) image['magic'] = m - image['entries'] = handler.load(f, pretty, no_payload) + image['entries'] = handler.load(f, pretty, no_payload, token) return image diff --git a/scripts/build/Dockerfile.alpine b/scripts/build/Dockerfile.alpine index 329d7791de..4220d94bf6 100644 --- a/scripts/build/Dockerfile.alpine +++ b/scripts/build/Dockerfile.alpine @@ -47,6 +47,6 @@ RUN apk add \ # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 -D test -RUN pip3 install junit_xml --break-system-packages +RUN pip3 install --break-system-packages junit_xml cryptography RUN make -C test/zdtm diff --git a/scripts/build/Dockerfile.centos8 b/scripts/build/Dockerfile.centos8 index a672123441..2800aa7c2d 100644 --- a/scripts/build/Dockerfile.centos8 +++ b/scripts/build/Dockerfile.centos8 @@ -45,6 +45,6 @@ RUN make mrproper && date && make -j $(nproc) CC="$CC" && date # The rpc test cases are running as user #1000, let's add the user RUN adduser -u 1000 test -RUN pip3 install junit_xml +RUN pip3 install junit_xml cryptography RUN make -C test/zdtm -j $(nproc) diff --git a/scripts/ci/prepare-for-fedora-rawhide.sh b/scripts/ci/prepare-for-fedora-rawhide.sh index 09085c403b..f7117c346e 100755 --- a/scripts/ci/prepare-for-fedora-rawhide.sh +++ b/scripts/ci/prepare-for-fedora-rawhide.sh @@ -26,6 +26,7 @@ dnf install -y \ python3-PyYAML \ python3-protobuf \ python3-junit_xml \ + python3-cryptography \ python3-pip \ python3-importlib-metadata \ python-unversioned-command \ diff --git a/scripts/ci/vagrant.sh b/scripts/ci/vagrant.sh index 3904c51d22..023f8aaf48 100755 --- a/scripts/ci/vagrant.sh +++ b/scripts/ci/vagrant.sh @@ -39,7 +39,7 @@ setup() { ssh default sudo dnf install -y gcc git gnutls-devel nftables-devel libaio-devel \ libasan libcap-devel libnet-devel libnl3-devel libbsd-devel make protobuf-c-devel \ protobuf-devel python3-protobuf python3-importlib-metadata python3-junit_xml \ - rubygem-asciidoctor iptables libselinux-devel libbpf-devel python3-yaml + python3-cryptography rubygem-asciidoctor iptables libselinux-devel libbpf-devel python3-yaml # Disable sssd to avoid zdtm test failures in pty04 due to sssd socket ssh default sudo systemctl mask sssd ssh default cat /proc/cmdline From 8019ba696d76d560d758010b6dd7e0b8eef1dab7 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Dec 2024 13:26:44 +0000 Subject: [PATCH 08/12] cr_system: enable data encryption cr_system() and cr_system_userns() are used to run external executables such as tar, ip, and iptables. These external tools are used to create image files in 3rd party format (i.e., raw images). In order to encrypt the output of these tools, and to decrypt their input, we replace the corresponding input/output file descriptor with a pipe, and perform encryption/decryption of the data. Signed-off-by: Radostin Stoyanov --- criu/action-scripts.c | 2 +- criu/filesystems.c | 5 ++-- criu/include/tls.h | 10 ++++++++ criu/include/util.h | 10 ++++++-- criu/kerndat.c | 2 +- criu/net.c | 19 ++++++++------- criu/netfilter.c | 8 +++--- criu/tls.c | 57 +++++++++++++++++++++++++++++++++++++++++++ criu/unittest/mock.c | 10 ++++++++ criu/util.c | 54 +++++++++++++++++++++++++++++++++++++--- 10 files changed, 155 insertions(+), 22 deletions(-) diff --git a/criu/action-scripts.c b/criu/action-scripts.c index 6f79001864..89d75dade5 100644 --- a/criu/action-scripts.c +++ b/criu/action-scripts.c @@ -89,7 +89,7 @@ static int run_shell_scripts(const char *action) list_for_each_entry(script, &scripts, node) { int err; pr_debug("\t[%s]\n", script->path); - err = cr_system(-1, -1, -1, script->path, (char *[]){ script->path, NULL }, 0); + err = cr_system(-1, -1, -1, script->path, (char *[]){ script->path, NULL }, 0, TLS_MODE_NONE); if (err) pr_err("Script %s exited with %d\n", script->path, err); retval |= err; diff --git a/criu/filesystems.c b/criu/filesystems.c index 093e1c4921..9183c5b7ff 100644 --- a/criu/filesystems.c +++ b/criu/filesystems.c @@ -19,6 +19,7 @@ #include "util.h" #include "fs-magic.h" #include "tty.h" +#include "tls.h" #include "images/mnt.pb-c.h" #include "images/binfmt-misc.pb-c.h" @@ -420,7 +421,7 @@ static int tmpfs_dump(struct mount_info *pm) (char *[]){ "tar", "--create", "--gzip", "--no-unquote", "--no-wildcards", "--one-file-system", "--check-links", "--preserve-permissions", "--sparse", "--numeric-owner", "--directory", "/proc/self/fd/0", ".", NULL }, - 0, userns_pid); + 0, userns_pid, TLS_MODE_ENCRYPT); if (ret) pr_err("Can't dump tmpfs content\n"); @@ -453,7 +454,7 @@ static int tmpfs_restore(struct mount_info *pm) ret = cr_system(img_raw_fd(img), -1, -1, "tar", (char *[]){ "tar", "--extract", "--gzip", "--no-unquote", "--no-wildcards", "--directory", service_mountpoint(pm), NULL }, - 0); + 0, TLS_MODE_DECRYPT); close_image(img); if (ret) { diff --git a/criu/include/tls.h b/criu/include/tls.h index 34833c9586..8c225b79dd 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -7,6 +7,12 @@ typedef struct { uint8_t nonce[12]; } chacha20_poly1305_t; +typedef struct { + chacha20_poly1305_t cipher_data; + ssize_t size; + char data[4096]; +} criu_datum_t; + #ifdef CONFIG_GNUTLS #include @@ -28,6 +34,8 @@ int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n int tls_encrypt_pipe_data(int fd_in, int fd_out, size_t data_size); int tls_encrypt_file_data(int fd_in, int fd_out, size_t data_size); int tls_decrypt_file_data(int fd_in, int fd_out, size_t data_size); +int tls_encryption_pipe(int output_file_fd, int pipe_read_fd); +int tls_decryption_pipe(int intput_file_fd, int pipe_write_fd); #else /* CONFIG_GNUTLS */ @@ -44,6 +52,8 @@ int tls_decrypt_file_data(int fd_in, int fd_out, size_t data_size); #define tls_encrypt_pipe_data(fd_in, fd_out, data_size) (-1) #define tls_encrypt_file_data(fd_in, fd_out, data_size) (-1) #define tls_decrypt_file_data(fd_in, fd_out, data_size) (-1) +#define tls_encryption_pipe(output_file_fd, pipe_read_fd) (-1) +#define tls_decryption_pipe(intput_file_fd, pipe_write_fd) (-1) #define write_img_cipher() (0) #endif /* CONFIG_HAS_GNUTLS */ diff --git a/criu/include/util.h b/criu/include/util.h index ae293a68c8..70624e91dc 100644 --- a/criu/include/util.h +++ b/criu/include/util.h @@ -164,8 +164,14 @@ extern int is_anon_link_type(char *link, char *type); #define CRS_CAN_FAIL 0x1 /* cmd can validly exit with non zero code */ -extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags); -extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid); +/* TLS modes for cr_system() */ +#define TLS_MODE_NONE 0 +#define TLS_MODE_ENCRYPT 1 +#define TLS_MODE_DECRYPT 2 + +extern int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int tls_mode); +extern int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid, + int tls_mode); extern pid_t fork_and_ptrace_attach(int (*child_setup)(void)); extern int cr_daemon(int nochdir, int noclose, int close_fd); extern int status_ready(void); diff --git a/criu/kerndat.c b/criu/kerndat.c index fa1ed21fad..b2320530a3 100644 --- a/criu/kerndat.c +++ b/criu/kerndat.c @@ -655,7 +655,7 @@ static int kerndat_iptables_has_xtlocks(void) } kdat.has_xtlocks = 1; - if (cr_system(fd, fd, fd, "sh", argv, CRS_CAN_FAIL) == -1) + if (cr_system(fd, fd, fd, "sh", argv, CRS_CAN_FAIL, TLS_MODE_NONE) == -1) kdat.has_xtlocks = 0; close_safe(&fd); diff --git a/criu/net.c b/criu/net.c index 4d8734fdfe..6ad7389e39 100644 --- a/criu/net.c +++ b/criu/net.c @@ -1971,7 +1971,8 @@ static int run_ip_tool(char *arg1, char *arg2, char *arg3, char *arg4, int fdin, if (!ip_tool_cmd) ip_tool_cmd = "ip"; - ret = cr_system(fdin, fdout, -1, ip_tool_cmd, (char *[]){ "ip", arg1, arg2, arg3, arg4, NULL }, flags); + ret = cr_system(fdin, fdout, -1, ip_tool_cmd, (char *[]){ "ip", arg1, arg2, arg3, arg4, NULL }, flags, + TLS_MODE_NONE); if (ret) { if (!(flags & CRS_CAN_FAIL)) pr_err("IP tool failed on %s %s %s %s\n", arg1, arg2, arg3 ?: "", arg4 ?: ""); @@ -1981,7 +1982,7 @@ static int run_ip_tool(char *arg1, char *arg2, char *arg3, char *arg4, int fdin, return 0; } -static int run_iptables_tool(char *def_cmd, int fdin, int fdout) +static int run_iptables_tool(char *def_cmd, int fdin, int fdout, int tls_mode) { int ret; char *cmd; @@ -1990,7 +1991,7 @@ static int run_iptables_tool(char *def_cmd, int fdin, int fdout) if (!cmd) cmd = def_cmd; pr_debug("\tRunning %s for %s\n", cmd, def_cmd); - ret = cr_system(fdin, fdout, -1, "sh", (char *[]){ "sh", "-c", cmd, NULL }, 0); + ret = cr_system(fdin, fdout, -1, "sh", (char *[]){ "sh", "-c", cmd, NULL }, 0, tls_mode); if (ret) pr_err("%s failed\n", def_cmd); @@ -2064,7 +2065,7 @@ static inline int dump_iptables(struct cr_imgset *fds) pr_info("skipping iptables dump - no legacy version present\n"); } else { img = img_from_set(fds, CR_FD_IPTABLES); - if (run_iptables_tool(iptables_cmd, -1, img_raw_fd(img))) + if (run_iptables_tool(iptables_cmd, -1, img_raw_fd(img), TLS_MODE_ENCRYPT)) return -1; } @@ -2075,7 +2076,7 @@ static inline int dump_iptables(struct cr_imgset *fds) pr_info("skipping ip6tables dump - no legacy version present\n"); } else { img = img_from_set(fds, CR_FD_IP6TABLES); - if (run_iptables_tool(ip6tables_cmd, -1, img_raw_fd(img))) + if (run_iptables_tool(ip6tables_cmd, -1, img_raw_fd(img), TLS_MODE_ENCRYPT)) return -1; } @@ -2409,7 +2410,7 @@ static inline int restore_iptables(int pid) return -1; } - ret = run_iptables_tool(comm, img_raw_fd(img), -1); + ret = run_iptables_tool(comm, img_raw_fd(img), -1, TLS_MODE_DECRYPT); close_image(img); if (ret) return ret; @@ -2432,7 +2433,7 @@ static inline int restore_iptables(int pid) return -1; } - ret = run_iptables_tool(comm, img_raw_fd(img), -1); + ret = run_iptables_tool(comm, img_raw_fd(img), -1, TLS_MODE_DECRYPT); out: close_image(img); @@ -3067,7 +3068,7 @@ static int iptables_restore(bool ipv6, char *buf, int size) } close_safe(&pfd[1]); - ret = cr_system(pfd[0], -1, -1, cmd[0], cmd, 0); + ret = cr_system(pfd[0], -1, -1, cmd[0], cmd, 0, TLS_MODE_NONE); err: close_safe(&pfd[1]); close_safe(&pfd[0]); @@ -3210,7 +3211,7 @@ static bool iptables_has_criu_jump_target(void) pr_perror("failed to open /dev/null, using log fd"); } - ret = cr_system(fd, fd, fd, "sh", argv, CRS_CAN_FAIL); + ret = cr_system(fd, fd, fd, "sh", argv, CRS_CAN_FAIL, TLS_MODE_NONE); close_safe(&fd); return !ret; } diff --git a/criu/netfilter.c b/criu/netfilter.c index 9e78dc4b03..5fb36d4745 100644 --- a/criu/netfilter.c +++ b/criu/netfilter.c @@ -48,8 +48,10 @@ void preload_netfilter_modules(void) fd = -1; pr_perror("failed to open /dev/null, using log fd for net module preload"); } - cr_system(fd, fd, fd, iptable_cmd_ipv4, (char *[]){ iptable_cmd_ipv4, "-L", "-n", NULL }, CRS_CAN_FAIL); - cr_system(fd, fd, fd, iptable_cmd_ipv6, (char *[]){ iptable_cmd_ipv6, "-L", "-n", NULL }, CRS_CAN_FAIL); + cr_system(fd, fd, fd, iptable_cmd_ipv4, (char *[]){ iptable_cmd_ipv4, "-L", "-n", NULL }, CRS_CAN_FAIL, + TLS_MODE_NONE); + cr_system(fd, fd, fd, iptable_cmd_ipv6, (char *[]){ iptable_cmd_ipv6, "-L", "-n", NULL }, CRS_CAN_FAIL, + TLS_MODE_NONE); close_safe(&fd); } @@ -100,7 +102,7 @@ static int iptables_connection_switch_raw(int family, u32 *src_addr, u16 src_por * cr_system is used here, because it blocks SIGCHLD before waiting * a child and the child can't be waited from SIGCHLD handler. */ - ret = cr_system(-1, -1, -1, "sh", argv, 0); + ret = cr_system(-1, -1, -1, "sh", argv, 0, TLS_MODE_NONE); if (ret < 0 || !WIFEXITED(ret) || WEXITSTATUS(ret)) { pr_err("Iptables configuration failed\n"); return -1; diff --git a/criu/tls.c b/criu/tls.c index 90da37526f..a95a58d967 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -1055,3 +1055,60 @@ int tls_decrypt_file_data(int fd_in, int fd_out, size_t data_size) err: return exit_code; } + +int tls_encryption_pipe(int output_fd, int pipe_read_fd) +{ + while (1) { + criu_datum_t datum; + + ssize_t ret = read(pipe_read_fd, datum.data, sizeof(datum.data)); + if (ret < 0) { + pr_perror("Failed to read data from pipe"); + return -1; + } + + if (ret == 0) + break; /* EOF */ + + datum.size = ret; + + if (tls_encrypt_data(datum.data, datum.size, datum.cipher_data.tag, datum.cipher_data.nonce) < 0) { + pr_err("Failed to encrypt buffer data\n"); + return -1; + } + + if (write(output_fd, &datum, sizeof(datum)) != sizeof(datum)) { + pr_perror("Failed to write data packet"); + return -1; + } + } + + return 0; +} + +int tls_decryption_pipe(int input_fd, int pipe_write_fd) +{ + while (1) { + criu_datum_t datum; + + ssize_t ret = read(input_fd, &datum, sizeof(datum)); + if (ret == 0) + break; /* EOF */ + + if (ret != sizeof(datum)) { + pr_err("Failed to read metadata: %ld != %ld\n", ret, sizeof(datum)); + return -1; + } + + if (tls_decrypt_data(datum.data, datum.size, datum.cipher_data.tag, datum.cipher_data.nonce) < 0) { + pr_err("Failed to decrypt buffer data\n"); + return -1; + } + + if (write(pipe_write_fd, datum.data, datum.size) == -1) { + pr_perror("Failed to write decrypted data"); + return -1; + } + } + return 0; +} diff --git a/criu/unittest/mock.c b/criu/unittest/mock.c index e517720e42..d344859ea2 100644 --- a/criu/unittest/mock.c +++ b/criu/unittest/mock.c @@ -142,3 +142,13 @@ int check_mount_v2(void) } uint64_t compel_run_id; + +int tls_encryption_pipe(int output_fd) +{ + return -1; +} + +int tls_decryption_pipe(int intput_fd) +{ + return -1; +} diff --git a/criu/util.c b/criu/util.c index d2bc9a8657..fcd9c7ccf8 100644 --- a/criu/util.c +++ b/criu/util.c @@ -40,6 +40,7 @@ #include "namespaces.h" #include "criu-log.h" #include "util-caps.h" +#include "tls.h" #include "clone-noasan.h" #include "cr_options.h" @@ -513,9 +514,9 @@ int is_anon_link_type(char *link, char *type) * If "in" is negative, stdin will be closed. * If "out" or "err" are negative, a log file descriptor will be used. */ -int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags) +int cr_system(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int tls_mode) { - return cr_system_userns(in, out, err, cmd, argv, flags, -1); + return cr_system_userns(in, out, err, cmd, argv, flags, -1, tls_mode); } int cr_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags) @@ -565,10 +566,15 @@ int close_fds(int minfd) return 0; } -int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid) +#define READ_END 0 +#define WRITE_END 1 + +int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], unsigned flags, int userns_pid, + int tls_mode) { sigset_t blockmask, oldmask; int ret = -1, status; + int pipe_fds[2]; pid_t pid; sigemptyset(&blockmask); @@ -578,6 +584,18 @@ int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], un return -1; } + /** + * To enable encryption/decryption with cr_system_userns(), + * we set the stdin or stdout FD to a PIPE that is used to + * encrypt or decrypt the data. + */ + if (opts.encrypt && tls_mode != TLS_MODE_NONE) { + if (pipe(pipe_fds)) { + pr_perror("Failed to create pipe"); + return -1; + } + } + pid = fork(); if (pid == -1) { pr_perror("fork() failed"); @@ -598,6 +616,16 @@ int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], un } } + if (opts.encrypt) { + if (tls_mode == TLS_MODE_ENCRYPT) { + close(pipe_fds[READ_END]); + out = pipe_fds[WRITE_END]; + } else if (tls_mode == TLS_MODE_DECRYPT) { + close(pipe_fds[WRITE_END]); + in = pipe_fds[READ_END]; + } + } + if (out < 0) out = DUP_SAFE(log_get_fd(), out_chld); if (err < 0) @@ -643,6 +671,24 @@ int cr_system_userns(int in, int out, int err, char *cmd, char *const argv[], un _exit(1); } + if (opts.encrypt) { + if (tls_mode == TLS_MODE_ENCRYPT) { + close(pipe_fds[WRITE_END]); + if (tls_encryption_pipe(out, pipe_fds[READ_END]) < 0) { + pr_err("Failed to encrypt data to pipe\n"); + goto out; + } + close(pipe_fds[READ_END]); + } else if (tls_mode == TLS_MODE_DECRYPT) { + close(pipe_fds[READ_END]); + if (tls_decryption_pipe(in, pipe_fds[WRITE_END]) < 0) { + pr_err("Failed to decrypt data from pipe\n"); + goto out; + } + close(pipe_fds[WRITE_END]); + } + } + while (1) { ret = waitpid(pid, &status, 0); if (ret == -1) { @@ -1716,7 +1762,7 @@ static int is_iptables_nft(char *bin) goto err; } - ret = cr_system(-1, pfd[1], -1, cmd[0], cmd, CRS_CAN_FAIL); + ret = cr_system(-1, pfd[1], -1, cmd[0], cmd, CRS_CAN_FAIL, TLS_MODE_NONE); if (ret) { pr_err("%s -V failed\n", cmd[0]); goto err; From 9485f49fab2899b763a54d524e182b6a2a347a23 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Dec 2024 13:31:11 +0000 Subject: [PATCH 09/12] tls: enable support for encryption of memory pages We use the AES-XTS block cipher to encrypt memory pages as it is designed to encrypt blocks of data with fixed-size (e.g. memory pages), allows the use of hardware acceleration available in modern CPUs, and uses a single initialization vector (IV), instead of per-page nonce, to ensure that encrypting the same plaintext with the same key results in different ciphertexts. In particular, XTS uses two 256-bits AES keys. One key is used to perform block encryption, and the other is used to encrypt a so-called "tweak value". The encrypted tweak value is further modified (with a Galois polynomial function) and XOR-ed with both the plaintext and ciphertext of each block. This method ensures that encrypting multiple blocks with identical data will produce different ciphertext. Since CRIU restores memory pages in the restorer context, this PIE code cannot be linked with libraries such as GnuTLS to perform decryption. Instead, we introduce a helper process to decrypt memory pages data. The restorer context communicates with this helper process using PIPEs. It sends the function arguments be used by preadv() and receives back its return value. The decrypted data is transferred to the target address space with process_vm_writev. Suggested-by: Daiki Ueno Signed-off-by: Radostin Stoyanov --- criu/include/restorer.h | 4 + criu/include/tls.h | 6 + criu/mem.c | 40 ++++++ criu/page-xfer.c | 55 ++++++-- criu/pagemap.c | 24 ++++ criu/pie/restorer.c | 90 ++++++++++++- criu/tls.c | 282 +++++++++++++++++++++++++++++++++++++++- images/cipher.proto | 2 + 8 files changed, 489 insertions(+), 14 deletions(-) diff --git a/criu/include/restorer.h b/criu/include/restorer.h index 3fb5322a4b..86f5c6a667 100644 --- a/criu/include/restorer.h +++ b/criu/include/restorer.h @@ -244,6 +244,10 @@ struct task_restore_args { */ struct rst_rseq_param libc_rseq; + bool encrypted_pages; + int decryption_pipe_fd_r; + int decryption_pipe_fd_w; + uid_t uid; u32 cap_eff[CR_CAP_SIZE]; diff --git a/criu/include/tls.h b/criu/include/tls.h index 8c225b79dd..3c359fbc00 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -36,6 +36,9 @@ int tls_encrypt_file_data(int fd_in, int fd_out, size_t data_size); int tls_decrypt_file_data(int fd_in, int fd_out, size_t data_size); int tls_encryption_pipe(int output_file_fd, int pipe_read_fd); int tls_decryption_pipe(int intput_file_fd, int pipe_write_fd); +int tls_block_cipher_encrypt_data(void *ptext, size_t ptext_len); +int tls_block_cipher_decrypt_data(void *ctext, size_t ctext_len); +int tls_vma_io_pipe(int pages_img_fd, int pipe_fds[2][2]); #else /* CONFIG_GNUTLS */ @@ -54,6 +57,9 @@ int tls_decryption_pipe(int intput_file_fd, int pipe_write_fd); #define tls_decrypt_file_data(fd_in, fd_out, data_size) (-1) #define tls_encryption_pipe(output_file_fd, pipe_read_fd) (-1) #define tls_decryption_pipe(intput_file_fd, pipe_write_fd) (-1) +#define tls_block_cipher_encrypt_data(ptext, ptext_len) (-1) +#define tls_block_cipher_decrypt_data(ctext, ctext_len) (-1) +#define tls_vma_io_pipe(pages_img_fd, pipe_fds) (-1) #define write_img_cipher() (0) #endif /* CONFIG_HAS_GNUTLS */ diff --git a/criu/mem.c b/criu/mem.c index c9578ef441..0aa220c755 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -31,6 +31,7 @@ #include "prctl.h" #include "compel/infect-util.h" #include "pidfd-store.h" +#include "tls.h" #include "protobuf.h" #include "images/pagemap.pb-c.h" @@ -1442,6 +1443,7 @@ int open_vmas(struct pstree_item *t) static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta) { struct cr_img *pages; + int pipe_fds[2][2]; /* * We optimize the case when rsti(t)->vma_io is empty. @@ -1466,6 +1468,44 @@ static int prepare_vma_ios(struct pstree_item *t, struct task_restore_args *ta) return -1; ta->vma_ios_fd = img_raw_fd(pages); + + if (!opts.encrypt) { + ta->encrypted_pages = false; + ta->decryption_pipe_fd_w = -1; + ta->decryption_pipe_fd_r = -1; + } else { + pid_t helper_pid, *child; + ta->encrypted_pages = true; + + if (pipe(pipe_fds[0])) { + pr_perror("Failed to create pipe"); + return -1; + } + if (pipe(pipe_fds[1])) { + pr_perror("Failed to create pipe"); + return -1; + } + + helper_pid = tls_vma_io_pipe(ta->vma_ios_fd, pipe_fds); + if (helper_pid < 0) { + pr_err("Failed to setup VMA IO pipe\n"); + return -1; + } + + /* Add PID of the helper task used to decrypt images, so that the + * restorer can wait() for it when the restore stage is done. + */ + ta->helpers = (pid_t *)rst_mem_align_cpos(RM_PRIVATE); + child = rst_mem_alloc(sizeof(*child), RM_PRIVATE); + ta->helpers_n++; + *child = helper_pid; + + close(pipe_fds[0][0]); + close(pipe_fds[1][1]); + ta->decryption_pipe_fd_w = pipe_fds[0][1]; + ta->decryption_pipe_fd_r = pipe_fds[1][0]; + } + return pagemap_render_iovec(&rsti(t)->vma_io, ta); } diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 94f4774148..20c21dc11d 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -257,19 +257,52 @@ static int write_pages_loc(struct page_xfer *xfer, int p, unsigned long len) ssize_t ret; ssize_t curr = 0; - while (1) { - ret = splice(p, NULL, img_raw_fd(xfer->pi), NULL, len - curr, SPLICE_F_MOVE); - if (ret == -1) { - pr_perror("Unable to spice data"); - return -1; + if (opts.encrypt) { + uint8_t buf[PAGE_SIZE]; + + /* We encrypt each page separately to enable decryption + * of arbitrary pages during restore. This is required + * for auto-deduplication and incremental checkpointing. + */ + BUG_ON((len % PAGE_SIZE) != 0); + + for (curr = 0; curr < len; curr += PAGE_SIZE) { + ret = read(p, buf, PAGE_SIZE); + if (ret < 0) { + pr_perror("Unable to read data"); + return -1; + } + if (ret == 0) { + pr_err("A pipe was closed unexpectedly\n"); + return -1; + } + BUG_ON(ret != PAGE_SIZE); + + if (tls_block_cipher_encrypt_data(buf, PAGE_SIZE)) { + pr_err("Failed to encrypt data\n"); + return -1; + } + ret = write(img_raw_fd(xfer->pi), buf, PAGE_SIZE); + if (ret != PAGE_SIZE) { + pr_perror("Unable to write data %zd", ret); + return -1; + } } - if (ret == 0) { - pr_err("A pipe was closed unexpectedly\n"); - return -1; + } else { + while (1) { + ret = splice(p, NULL, img_raw_fd(xfer->pi), NULL, len - curr, SPLICE_F_MOVE); + if (ret == -1) { + pr_perror("Unable to spice data"); + return -1; + } + if (ret == 0) { + pr_err("A pipe was closed unexpectedly\n"); + return -1; + } + curr += ret; + if (curr == len) + break; } - curr += ret; - if (curr == len) - break; } return 0; diff --git a/criu/pagemap.c b/criu/pagemap.c index 85bb922596..e7184eb4e1 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -13,6 +13,7 @@ #include "restorer.h" #include "rst-malloc.h" #include "page-xfer.h" +#include "tls.h" #include "fault-injection.h" #include "xmalloc.h" @@ -261,6 +262,16 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr, unsigned l break; } + if (opts.encrypt) { + /* We need to make sure to read the full content of pages before decrypting the data */ + for (int i = 0; i < len; i += PAGE_SIZE) { + if (tls_block_cipher_decrypt_data(buf + i, PAGE_SIZE)) { + pr_err("Failed to decrypt data\n"); + return -1; + } + } + } + if (opts.auto_dedup && !pr->disable_dedup) { ret = punch_hole(pr, pr->pi_off, len, false); if (ret == -1) @@ -536,6 +547,7 @@ static int process_async_reads(struct page_read *pr) list_for_each_entry_safe(piov, n, &pr->async, l) { ssize_t ret; struct iovec *iovs = piov->to; + int iovcnt = piov->nr; pr_debug("Read piov iovs %d, from %ju, len %ju, first %p:%zu\n", piov->nr, piov->from, piov->end - piov->from, piov->to->iov_base, piov->to->iov_len); @@ -578,6 +590,18 @@ static int process_async_reads(struct page_read *pr) BUG_ON(pr->io_complete); /* FIXME -- implement once needed */ + if (opts.encrypt) { + /* We need to make sure to read the full content of pages before decrypting the data */ + for (int i = 0; i < iovcnt; i++) { + for (int j = 0; j < iovs[i].iov_len; j += PAGE_SIZE) { + if (tls_block_cipher_decrypt_data(iovs[i].iov_base + j, PAGE_SIZE)) { + pr_err("Failed to decrypt data\n"); + return -1; + } + } + } + } + list_del(&piov->l); xfree(iovs); xfree(piov); diff --git a/criu/pie/restorer.c b/criu/pie/restorer.c index 51ed6ed4c8..7489922ced 100644 --- a/criu/pie/restorer.c +++ b/criu/pie/restorer.c @@ -1524,6 +1524,75 @@ static ssize_t preadv_limited(int fd, struct iovec *iovs, int nr, off_t offs, si return ret; } +/** + * decrypt_preadv_limited() is similar to preadv_limited(), + * but uses pipes to communicate with a helper process that + * decrypts the content of pages. + */ +static ssize_t decrypt_preadv_limited(int rfd, int wfd, struct iovec *iovs, int nr, off_t offs, size_t max_to_read) +{ + size_t saved_last_iov_len = 0; + ssize_t ret; + ssize_t preadv_ret; + pid_t local_pid = sys_getpid(); + + if (max_to_read) { + for (int i = 0; i < nr; ++i) { + if (iovs[i].iov_len <= max_to_read) { + max_to_read -= iovs[i].iov_len; + continue; + } + + if (!max_to_read) { + nr = i; + break; + } + + saved_last_iov_len = iovs[i].iov_len; + iovs[i].iov_len = max_to_read; + nr = i + 1; + break; + } + } + + ret = sys_write(wfd, &local_pid, sizeof(pid_t)); + if (ret < 0) { + return -1; + } + + ret = sys_write(wfd, &offs, sizeof(off_t)); + if (ret < 0) { + return -1; + } + + ret = sys_write(wfd, &nr, sizeof(int)); + if (ret < 0) { + return -1; + } + + for (int i = 0; i < nr; i++) { + ret = sys_write(wfd, &iovs[i].iov_len, sizeof(size_t)); + if (ret < 0) { + return -1; + } + + ret = sys_write(wfd, &iovs[i].iov_base, sizeof(void *)); + if (ret < 0) { + return -1; + } + } + + ret = sys_read(rfd, &preadv_ret, sizeof(ssize_t)); + if (ret < 0) { + return -1; + } + + if (saved_last_iov_len) + iovs[nr - 1].iov_len = saved_last_iov_len; + + return preadv_ret; +} + /* * In the worst case buf size should be: * sizeof(struct inotify_event) * 2 + PATH_MAX @@ -1816,8 +1885,15 @@ __visible long __export_restore_task(struct task_restore_args *args) * If we're requested to punch holes in the file after reading we do * it to save memory. Limit the reads then to an arbitrary block size. */ - r = preadv_limited(args->vma_ios_fd, iovs, nr, rio->off, - args->auto_dedup ? AUTO_DEDUP_OVERHEAD_BYTES : 0); + if (args->encrypted_pages) { + r = decrypt_preadv_limited(args->decryption_pipe_fd_r, args->decryption_pipe_fd_w, iovs, + nr, rio->off, + args->auto_dedup ? AUTO_DEDUP_OVERHEAD_BYTES : 0); + } else { + r = preadv_limited(args->vma_ios_fd, iovs, nr, rio->off, + args->auto_dedup ? AUTO_DEDUP_OVERHEAD_BYTES : 0); + } + if (r < 0) { pr_err("Can't read pages data (%d)\n", (int)r); goto core_restore_end; @@ -1853,6 +1929,16 @@ __visible long __export_restore_task(struct task_restore_args *args) rio = ((void *)rio) + RIO_SIZE(rio->nr_iovs); } + /* + * Close PIPEs used for communicating with helper processes. + * See tls_vma_io_pipe(). + */ + if (args->encrypted_pages) { + pr_debug("Closing decryption pipe\n"); + sys_close(args->decryption_pipe_fd_r); + sys_close(args->decryption_pipe_fd_w); + } + if (args->vma_ios_fd != -1) sys_close(args->vma_ios_fd); diff --git a/criu/tls.c b/criu/tls.c index a95a58d967..cd49843013 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -55,6 +55,20 @@ static int tls_sk_flags = 0; static uint8_t token[32]; static const gnutls_cipher_algorithm_t stream_cipher_algorithm = GNUTLS_CIPHER_CHACHA20_POLY1305; +/* AES-XTS is used for encryption of image files. + * XTS uses two 256-bits AES keys - one key is used to perform + * the AES block encryption; the other is used to encrypt what is + * known as a "tweak value." This encrypted tweak is further modified + * with a Galois polynomial function (GF) and XOR with both the plain + * text and the cipher text of each block. This ensures that blocks of + * identical data will not produce identical cipher text. + */ +static gnutls_cipher_hd_t block_cipher_handle; +static gnutls_datum_t aes_xts_key, aes_xts_iv; +static unsigned int cipher_block_size; +static const gnutls_cipher_algorithm_t block_cipher_algorithm = GNUTLS_CIPHER_AES_256_XTS; + + void tls_terminate_session(bool async) { int ret; @@ -431,6 +445,29 @@ static inline int _tls_generate_token(void) return gnutls_rnd(GNUTLS_RND_KEY, &token, sizeof(token)); } +static inline int _aes_xts_generate_key(void) +{ + aes_xts_key.size = gnutls_cipher_get_key_size(block_cipher_algorithm); + aes_xts_key.data = xmalloc(aes_xts_key.size); + pr_debug("Generating encryption key (%u bytes)\n", aes_xts_key.size); + return gnutls_rnd(GNUTLS_RND_KEY, aes_xts_key.data, aes_xts_key.size); +} + +static inline int _aes_xts_generate_iv(void) +{ + aes_xts_iv.size = gnutls_cipher_get_iv_size(block_cipher_algorithm); + aes_xts_iv.data = xmalloc(aes_xts_iv.size); + pr_debug("Generating encryption IV (%u bytes)\n", aes_xts_iv.size); + return gnutls_rnd(GNUTLS_RND_NONCE, aes_xts_iv.data, aes_xts_iv.size); +} + +static int _aes_xts_cipher_init(void) +{ + pr_debug("Initializing %s cipher\n", gnutls_cipher_get_name(block_cipher_algorithm)); + cipher_block_size = gnutls_cipher_get_block_size(block_cipher_algorithm); + return gnutls_cipher_init(&block_cipher_handle, block_cipher_algorithm, &aes_xts_key, &aes_xts_iv); +} + /** * tls_initialize_cipher initializes GnuTLS, loads a public key, * and initializes a cipher context that is used to encrypt the @@ -486,6 +523,24 @@ int tls_initialize_cipher(void) return -1; } + ret = _aes_xts_generate_key(); + if (ret < 0) { + tls_perror("Failed to generate key", ret); + return -1; + } + + ret = _aes_xts_generate_iv(); + if (ret < 0) { + tls_perror("Failed to generate iv", ret); + return -1; + } + + ret = _aes_xts_cipher_init(); + if (ret < 0) { + tls_perror("Failed to initialize cipher", ret); + return -1; + } + gnutls_x509_crt_deinit(crt); return 0; @@ -627,33 +682,39 @@ int tls_initialize_cipher_from_image(void) if (!x509_key) return -1; + /* Initialize private key object */ ret = gnutls_privkey_init(&privkey); if (ret < 0) { tls_perror("Failed to initialize private key", ret); return -1; } + /* Import private key */ ret = gnutls_privkey_import_x509(privkey, x509_key, 0); if (ret < 0) { tls_perror("Failed to import private key", ret); return -1; } + /* Load entry from cipher image */ ret = pb_read_one(img, &ce, PB_CIPHER); if (ret < 0) { pr_err("Failed to read cipher entry\n"); goto out_close; } + pr_debug("Loading ChaCha20-Poly1305 key from cipher image\n"); + + /* Decrypt token */ ciphertext.data = ce->token.data; ciphertext.size = ce->token.len; - ret = gnutls_privkey_decrypt_data(privkey, 0, &ciphertext, &decrypted_token); if (ret < 0) { tls_perror("Failed to decrypt token data", ret); goto out_close; } + /* Validate token size */ if (decrypted_token.size != sizeof(token)) { pr_err("Invalid token size (%d != %lu)\n", decrypted_token.size, sizeof(token)); goto out_close; @@ -664,6 +725,43 @@ int tls_initialize_cipher_from_image(void) goto out_close; } + pr_debug("Loading AES key from cipher image\n"); + + /* Decrypt AES key */ + ciphertext.data = ce->aes_key.data; + ciphertext.size = ce->aes_key.len; + ret = gnutls_privkey_decrypt_data(privkey, 0, &ciphertext, &decrypted_token); + if (ret < 0) { + tls_perror("Failed to decrypt key data", ret); + goto out_close; + } + + /* Validate AES key size */ + aes_xts_key.size = gnutls_cipher_get_key_size(block_cipher_algorithm); + if (decrypted_token.size != aes_xts_key.size) { + pr_err("Invalid key size (%d != %u)\n", decrypted_token.size, aes_xts_key.size); + goto out_close; + } + aes_xts_key.data = xmalloc(aes_xts_key.size); + memcpy(aes_xts_key.data, decrypted_token.data, decrypted_token.size); + + pr_debug("Loading IV from cipher image\n"); + + aes_xts_iv.size = gnutls_cipher_get_iv_size(block_cipher_algorithm); + if (ce->aes_iv.len != aes_xts_iv.size) { + pr_err("Invalid IV size (%lu != %u)\n", ce->aes_iv.len, aes_xts_iv.size); + goto out_close; + } + aes_xts_iv.data = xmalloc(aes_xts_iv.size); + memcpy(aes_xts_iv.data, ce->aes_iv.data, aes_xts_iv.size); + + /* Initialize AES-XTS cipher context */ + ret = _aes_xts_cipher_init(); + if (ret < 0) { + tls_perror("Failed to initialize cipher", ret); + return -1; + } + ret = 0; out_close: close_image(img); @@ -737,6 +835,20 @@ int write_img_cipher(void) ce.token.len = ciphertext.size; ce.token.data = ciphertext.data; + plaintext.data = aes_xts_key.data; + plaintext.size = aes_xts_key.size; + ret = _encrypt_data_with_pubkey(&plaintext, &ciphertext); + if (ret < 0) { + return -1; + } + ce.has_aes_key = true; + ce.aes_key.len = ciphertext.size; + ce.aes_key.data = ciphertext.data; + + ce.has_aes_iv = true; + ce.aes_iv.len = aes_xts_iv.size; + ce.aes_iv.data = aes_xts_iv.data; + pr_debug("Writing cipher image\n"); img = open_image(CR_FD_CIPHER, O_DUMP); if (!img) @@ -1112,3 +1224,171 @@ int tls_decryption_pipe(int input_fd, int pipe_write_fd) } return 0; } + +int tls_block_cipher_encrypt_data(void *ptext, size_t ptext_len) +{ + int ret; + + ret = gnutls_cipher_encrypt2(block_cipher_handle, ptext, ptext_len, (void *)ptext, ptext_len); + if (ret < 0) { + tls_perror("Failed to encrypt data", ret); + return -1; + } + return 0; +} + +int tls_block_cipher_decrypt_data(void *ctext, size_t ctext_len) +{ + int ret; + + ret = gnutls_cipher_decrypt2(block_cipher_handle, ctext, ctext_len, (void *)ctext, ctext_len); + if (ret < 0) { + tls_perror("Failed to decrypt data", ret); + return -1; + } + return 0; +} + +/** + * tls_vma_io_pipe forks a child process that reads encrypted data from + * the pages_img_fd and decrypts the data. It uses process_vm_writev() + * to write the decrypted data to the address space of remote process. + * The pipe_read_fd and pipe_write_fd are used to communicate with the + * restorer process (see decrypt_preadv_limited() in criu/pie/restorer.c). + */ +int tls_vma_io_pipe(int pages_img_fd, int pipe_fds[2][2]) +{ + pid_t child_pid; + int ret; + int pipe_read_fd = pipe_fds[0][0], pipe_write_fd = pipe_fds[1][1]; + + child_pid = fork(); + if (child_pid == -1) { + pr_perror("Failed to fork"); + return -1; + } + + if (child_pid > 0) { + return child_pid; + } + + close(pipe_fds[1][0]); + close(pipe_fds[0][1]); + + child_pid = getpid(); + + while (1) { + int nr; + off_t offs; + pid_t remote_pid; + struct iovec *local_iovs, *remote_iovs; + size_t iov_len, total_len = 0; + ssize_t preadv_ret; + + /* Read remote PID from pipe. This is the PID value used + * by process_vm_writev() to identify the remote process + */ + ret = read(pipe_read_fd, &remote_pid, sizeof(pid_t)); + if (ret < 0) { + pr_perror("Failed reading offs"); + exit(1); + } + if (ret == 0) { + break; /* EOF */ + } + + /* Read offs and nr from pipe. These are the offset and + * number of iovecs used by preadv() to read data from + * the pages image. The data is then decrypted and written + * to the remote process using process_vm_writev(). */ + ret = read(pipe_read_fd, &offs, sizeof(off_t)); + if (ret < 0) { + pr_perror("Failed reading offs"); + exit(1); + } + + ret = read(pipe_read_fd, &nr, sizeof(int)); + if (ret < 0) { + pr_perror("Failed reading nr"); + exit(1); + } + + /* local_iovs are used to read encrypted data from pages image + * remote_iovs are used to write decrypted data to remote process + * See decrypt_preadv_limited() in criu/pie/restorer.c and man page + * for process_vm_writev(2). */ + local_iovs = xmalloc(nr * sizeof(struct iovec)); + remote_iovs = xmalloc(nr * sizeof(struct iovec)); + + for (int i = 0; i < nr; i++) { + ret = read(pipe_read_fd, &iov_len, sizeof(size_t)); + if (ret < -1) { + pr_perror("Failed reading iov_len"); + exit(1); + } + + /* process_vm_writev() would fail with EINVAL if the + * sum of the iov_len values overflows a ssize_t value */ + if ((iov_len + total_len) > SSIZE_MAX) { + pr_err("Invalid iov_len value\n"); + exit(1); + } + + local_iovs[i].iov_len = iov_len; + remote_iovs[i].iov_len = iov_len; + total_len += iov_len; + + local_iovs[i].iov_base = xmalloc(iov_len); + if (local_iovs[i].iov_base == NULL) { + exit(1); + } + + ret = read(pipe_read_fd, &remote_iovs[i].iov_base, sizeof(void *)); + if (ret < -1) { + pr_perror("Failed reading iov_len"); + exit(1); + } + } + + /* Read encrypted data from pages image into local_iovs */ + preadv_ret = preadv(pages_img_fd, local_iovs, nr, offs); + if (preadv_ret != total_len) { + pr_perror("Failed reading iovs from image"); + exit(1); + } + + /* Decrypt content of images */ + for (int i = 0; i < nr; i++) { + for (int j = 0; j < local_iovs[i].iov_len; j += PAGE_SIZE) { + if (tls_block_cipher_decrypt_data(local_iovs[i].iov_base + j, PAGE_SIZE)) { + pr_err("Failed to decrypt data\n"); + exit(1); + } + } + } + + /* Write decrypted data to remote process address space */ + ret = process_vm_writev(remote_pid, local_iovs, nr, remote_iovs, nr, 0); + if (ret < 0) { + pr_perror("Failed writing iovs to remote process"); + exit(1); + } + + /* Send preadv() return value to the restorer process so + * that it can return it to the caller */ + ret = write(pipe_write_fd, &preadv_ret, sizeof(ssize_t)); + if (ret < 0) { + pr_perror("Failed writing ret"); + exit(1); + } + + /* Cleanup local_iovs and remote_iovs */ + for (int i = 0; i < nr; i++) { + xfree(local_iovs[i].iov_base); + } + xfree(local_iovs); + xfree(remote_iovs); + } + + exit(0); +} diff --git a/images/cipher.proto b/images/cipher.proto index 801043bc51..c272139568 100644 --- a/images/cipher.proto +++ b/images/cipher.proto @@ -4,4 +4,6 @@ syntax = "proto2"; message cipher_entry { required bytes token = 1; + optional bytes aes_key = 2; + optional bytes aes_iv = 3; } From 3d9d845ea08e7129dc35e7256310ff1f9d7a552f Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Thu, 29 Feb 2024 14:07:12 +0000 Subject: [PATCH 10/12] tls: verify the integrity of memory pages The AES-XTS cipher does not provide integrity verification. In this patch we add a verification mechanism based on the HMAC-SHA-256 algorithm. In order to support iterative checkpointing and memory deduplication with encrypted memory, and to avoid storing HMAC for each memory page, we compute XOR for of HMAC value for all memory pages and store this value in cipher.img The XOR computation also allows us to address the problem that memory pages are read during restore in a different order then they are written during checkpoint. In addition, to ensure that memory pages are restored in correct order, we include the PID and VMA address associated with each page in the HMAC computation. The following example illustrates the HMAC value computation: H_n = HMAC(PID + VMA + MEMORY + KEY) hmac_value = H_1 ^ H_2 ^ ... ^ H_n - PID: PID associated with the memory page - VMA: virtual memory address associated with memory page - KEY: secret key - H_n: n-th memory page - hmac_value: value stored in cipther.img during checkpoint, and used for integrity verification during restore Signed-off-by: Radostin Stoyanov --- criu/cr-restore.c | 4 + criu/include/pagemap.h | 2 +- criu/include/tls.h | 8 ++ criu/mem.c | 2 +- criu/page-xfer.c | 12 +++ criu/pagemap.c | 66 ++++++++++++++-- criu/tls.c | 167 ++++++++++++++++++++++++++++++++++++++++- images/cipher.proto | 1 + 8 files changed, 252 insertions(+), 10 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index beb63a2b24..2b44a58970 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2176,6 +2176,10 @@ static int restore_root_task(struct pstree_item *init) goto out_kill; } + /* Verify the integrity of encrypted memory pages */ + if (!tls_verify_hmac()) + goto out_kill_network_unlocked; + /* * There is no need to call try_clean_remaps() after this point, * as restore went OK and all ghosts were removed by the openers. diff --git a/criu/include/pagemap.h b/criu/include/pagemap.h index 3ae15deb9c..cf9d360bc7 100644 --- a/criu/include/pagemap.h +++ b/criu/include/pagemap.h @@ -105,7 +105,7 @@ extern int open_page_read_at(int dfd, unsigned long id, struct page_read *pr, in struct task_restore_args; -int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long len, struct list_head *to); +int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long vaddr, unsigned long len, struct list_head *to); int pagemap_render_iovec(struct list_head *from, struct task_restore_args *ta); /* diff --git a/criu/include/tls.h b/criu/include/tls.h index 3c359fbc00..d10aab605b 100644 --- a/criu/include/tls.h +++ b/criu/include/tls.h @@ -39,6 +39,10 @@ int tls_decryption_pipe(int intput_file_fd, int pipe_write_fd); int tls_block_cipher_encrypt_data(void *ptext, size_t ptext_len); int tls_block_cipher_decrypt_data(void *ctext, size_t ctext_len); int tls_vma_io_pipe(int pages_img_fd, int pipe_fds[2][2]); +bool tls_verify_hmac(void); +void tls_set_hmac_vma_metadata(uint64_t vma_addr); +void tls_increment_hmac_vma_metadata(uint64_t n); +void tls_set_hmac_pid_metadata(pid_t pid); #else /* CONFIG_GNUTLS */ @@ -61,6 +65,10 @@ int tls_vma_io_pipe(int pages_img_fd, int pipe_fds[2][2]); #define tls_block_cipher_decrypt_data(ctext, ctext_len) (-1) #define tls_vma_io_pipe(pages_img_fd, pipe_fds) (-1) #define write_img_cipher() (0) +#define tls_verify_hmac() (true) +#define tls_set_hmac_vma_metadata(vma_addr) +#define tls_increment_hmac_vma_metadata(n) +#define tls_set_hmac_pid_metadata(pid) #endif /* CONFIG_HAS_GNUTLS */ diff --git a/criu/mem.c b/criu/mem.c index 0aa220c755..4e71af07a1 100644 --- a/criu/mem.c +++ b/criu/mem.c @@ -1155,7 +1155,7 @@ static int restore_priv_vma_content(struct pstree_item *t, struct page_read *pr) BUG(); } - if (pagemap_enqueue_iovec(pr, (void *)va, len, vma_io)) + if (pagemap_enqueue_iovec(pr, (void *)va, va, len, vma_io)) return -1; pr->skip_pages(pr, len); diff --git a/criu/page-xfer.c b/criu/page-xfer.c index 20c21dc11d..78cd1dbe24 100644 --- a/criu/page-xfer.c +++ b/criu/page-xfer.c @@ -282,6 +282,11 @@ static int write_pages_loc(struct page_xfer *xfer, int p, unsigned long len) pr_err("Failed to encrypt data\n"); return -1; } + + /* The incremented VMA address will be used to compute + * HMAC in next iteration */ + tls_increment_hmac_vma_metadata(PAGE_SIZE); + ret = write(img_raw_fd(xfer->pi), buf, PAGE_SIZE); if (ret != PAGE_SIZE) { pr_perror("Unable to write data %zd", ret); @@ -889,6 +894,9 @@ int page_xfer_predump_pages(int pid, struct page_xfer *xfer, struct page_pipe *p if (xfer->write_pagemap(xfer, &iov, flags)) goto err; + /* Set vma address used to compute HMAC value */ + tls_set_hmac_vma_metadata(encode_pointer(iov.iov_base)); + if (xfer->write_pages(xfer, ppb->p[0], iov.iov_len)) goto err; } @@ -936,6 +944,10 @@ int page_xfer_dump_pages(struct page_xfer *xfer, struct page_pipe *pp) if (xfer->write_pagemap(xfer, &iov, flags)) return -1; + + /* Set vma address used to compute HMAC value */ + tls_set_hmac_vma_metadata(encode_pointer(iov.iov_base)); + if ((flags & PE_PRESENT) && xfer->write_pages(xfer, ppb->p[0], iov.iov_len)) return -1; } diff --git a/criu/pagemap.c b/criu/pagemap.c index e7184eb4e1..703ebc04a5 100644 --- a/criu/pagemap.c +++ b/criu/pagemap.c @@ -27,6 +27,11 @@ #define MAX_BUNCH_SIZE 256 +struct vaddr_array { + uint64_t vaddr; + unsigned long len; +}; + /* * One "job" for the preadv() syscall in pagemap.c */ @@ -36,6 +41,17 @@ struct page_read_iov { struct iovec *to; /* destination iovs */ unsigned int nr; /* their number */ + /* We use the virtual address (vaddr) of memory pages to compute HMAC + * when decrypting pages during restore and verify the integiry of page + * data (see tls_block_cipher_decrypt_data() in tls.c). Each iovec may + * contain multiple pages with different vaddr value. Thus, we keep an + * array with both vaddr and length for nonsequential pages stored in iov. + * + * See pagemap_enqueue_iovec() and process_async_reads() for more details. + */ + struct vaddr_array vaddr_array[IOV_MAX]; + unsigned int vaddr_array_size; + struct list_head l; }; @@ -264,11 +280,13 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr, unsigned l if (opts.encrypt) { /* We need to make sure to read the full content of pages before decrypting the data */ + tls_set_hmac_vma_metadata(pr->cvaddr); for (int i = 0; i < len; i += PAGE_SIZE) { if (tls_block_cipher_decrypt_data(buf + i, PAGE_SIZE)) { pr_err("Failed to decrypt data\n"); return -1; } + tls_increment_hmac_vma_metadata(PAGE_SIZE); } } @@ -281,7 +299,7 @@ static int read_local_page(struct page_read *pr, unsigned long vaddr, unsigned l return 0; } -static int enqueue_async_iov(struct page_read *pr, void *buf, unsigned long len, struct list_head *to) +static int enqueue_async_iov(struct page_read *pr, void *buf, unsigned long vaddr, unsigned long len, struct list_head *to) { struct page_read_iov *pr_iov; struct iovec *iov; @@ -293,6 +311,10 @@ static int enqueue_async_iov(struct page_read *pr, void *buf, unsigned long len, pr_iov->from = pr->pi_off; pr_iov->end = pr->pi_off + len; + pr_iov->vaddr_array[0].vaddr = vaddr; + pr_iov->vaddr_array[0].len = len; + pr_iov->vaddr_array_size = 1; + iov = xzalloc(sizeof(*iov)); if (!iov) { xfree(pr_iov); @@ -335,7 +357,7 @@ int pagemap_render_iovec(struct list_head *from, struct task_restore_args *ta) return 0; } -int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long len, struct list_head *to) +int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long vaddr, unsigned long len, struct list_head *to) { struct page_read_iov *cur_async = NULL; struct iovec *iov; @@ -350,7 +372,7 @@ int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long len, st * Start the new preadv request here. */ if (!cur_async || pr->pi_off != cur_async->end) - return enqueue_async_iov(pr, buf, len, to); + return enqueue_async_iov(pr, buf, vaddr, len, to); /* * This read is pure continuation of the previous one. Let's @@ -365,7 +387,7 @@ int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long len, st unsigned int n_iovs = cur_async->nr + 1; if (n_iovs >= IOV_MAX) - return enqueue_async_iov(pr, buf, len, to); + return enqueue_async_iov(pr, buf, vaddr, len, to); iov = xrealloc(cur_async->to, n_iovs * sizeof(*iov)); if (!iov) @@ -380,6 +402,18 @@ int pagemap_enqueue_iovec(struct page_read *pr, void *buf, unsigned long len, st cur_async->nr = n_iovs; } + if (opts.encrypt) { + if (cur_async->vaddr_array[cur_async->vaddr_array_size - 1].vaddr == pr->pe->vaddr) { + /* Extend the last vaddr entry */ + cur_async->vaddr_array[cur_async->vaddr_array_size - 1].len += len; + } else { + /* Add new vaddr entry */ + cur_async->vaddr_array[cur_async->vaddr_array_size].vaddr = pr->pe->vaddr; + cur_async->vaddr_array[cur_async->vaddr_array_size].len = len; + cur_async->vaddr_array_size++; + } + } + cur_async->end += len; return 0; @@ -397,7 +431,7 @@ static int maybe_read_page_local(struct page_read *pr, unsigned long vaddr, int * cached read. */ if ((flags & (PR_ASYNC | PR_ASAP)) == PR_ASYNC) - ret = pagemap_enqueue_iovec(pr, buf, len, &pr->async); + ret = pagemap_enqueue_iovec(pr, buf, vaddr, len, &pr->async); else { ret = read_local_page(pr, vaddr, len, buf); if (ret == 0 && pr->io_complete) @@ -592,12 +626,32 @@ static int process_async_reads(struct page_read *pr) if (opts.encrypt) { /* We need to make sure to read the full content of pages before decrypting the data */ + int64_t idx = 0, len; + for (int i = 0; i < iovcnt; i++) { - for (int j = 0; j < iovs[i].iov_len; j += PAGE_SIZE) { + + /* Set vma address used to compute HMAC value */ + tls_set_hmac_vma_metadata(piov->vaddr_array[idx].vaddr); + len = piov->vaddr_array[idx].len; + + for (size_t j = 0; j < iovs[i].iov_len; j += PAGE_SIZE) { if (tls_block_cipher_decrypt_data(iovs[i].iov_base + j, PAGE_SIZE)) { pr_err("Failed to decrypt data\n"); return -1; } + + len -= PAGE_SIZE; + BUG_ON(len < 0); + + if (len > 0) { + /* Increment virtual address for next page */ + tls_increment_hmac_vma_metadata(PAGE_SIZE); + } else { + /* Move to the next vaddr_array entry */ + idx++; + len = piov->vaddr_array[idx].len; + tls_set_hmac_vma_metadata(piov->vaddr_array[idx].vaddr); + } } } } diff --git a/criu/tls.c b/criu/tls.c index cd49843013..2d3c326802 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -11,6 +12,7 @@ #include #include +#include "types.h" #include "page.h" #include "imgset.h" #include "images/cipher.pb-c.h" @@ -68,6 +70,55 @@ static gnutls_datum_t aes_xts_key, aes_xts_iv; static unsigned int cipher_block_size; static const gnutls_cipher_algorithm_t block_cipher_algorithm = GNUTLS_CIPHER_AES_256_XTS; +#define HMAC_SIZE 32 /* SHA256 */ +#define HMAC_ALGO GNUTLS_MAC_SHA256 +/* We use shared memory to compute HMAC during restore + * as some pages are decrypted in a child (helper) process. + * See tls_vma_io_pipe(). + */ +static uint8_t checkpoint_hmac_digest[HMAC_SIZE]; +static uint8_t *restore_hmac_digest; +static gnutls_hmac_hd_t hmac_ctx; + +struct hmac_metadata_t { + uint64_t vma_vaddr; + pid_t pid; +}; +static struct hmac_metadata_t hmac_metadata; + +void tls_increment_hmac_vma_metadata(uint64_t n) +{ + if (opts.encrypt) + hmac_metadata.vma_vaddr += n; +} + +void tls_set_hmac_vma_metadata(uint64_t vma_vaddr) +{ + if (opts.encrypt) + hmac_metadata.vma_vaddr = vma_vaddr; +} + +void tls_set_hmac_pid_metadata(pid_t pid) +{ + if (opts.encrypt) + hmac_metadata.pid = pid; +} + +static void tls_hmac_init(void) +{ + gnutls_hmac_init(&hmac_ctx, HMAC_ALGO, token, sizeof(token)); +} + +static inline void pr_digest(const char *prefix, const void *_str) +{ + const char *str = _str; + char output[HMAC_SIZE + 1]; + + for (int i = 0; i < HMAC_SIZE; i++) { + snprintf(output + i, 4, "%02x ", (str[i] & 0xFF)); + } + pr_debug("%s: %s\n", prefix, output); +} void tls_terminate_session(bool async) { @@ -541,6 +592,8 @@ int tls_initialize_cipher(void) return -1; } + tls_hmac_init(); + gnutls_x509_crt_deinit(crt); return 0; @@ -652,7 +705,7 @@ static gnutls_x509_privkey_t rsa_load_pem_key(const char *privkey_file_path) */ int tls_initialize_cipher_from_image(void) { - int ret; + int ret = -1; char *privkey_file_path = CRIU_KEY; struct cr_img *img; CipherEntry *ce; @@ -725,6 +778,31 @@ int tls_initialize_cipher_from_image(void) goto out_close; } + pr_debug("Loading HMAC from cipher image\n"); + + if (ce->has_hmac_digest == false) { + pr_err("Missing HMAC digest\n"); + goto out_close; + } + + if (ce->hmac_digest.len != HMAC_SIZE) { + pr_err("Invalid HMAC size (%lu)\n", ce->hmac_digest.len); + goto out_close; + } + + memcpy(checkpoint_hmac_digest, ce->hmac_digest.data, HMAC_SIZE); + tls_hmac_init(); + + /* Since part of the decryption during restore happens within a helper process + * created with tls_vma_io_pipe(), we use shared memory to compute a commulative + * HMAC digest value. + */ + restore_hmac_digest = mmap(NULL, HMAC_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (restore_hmac_digest == MAP_FAILED) { + pr_perror("Can't allocate shared mem for HMAC"); + goto out_close; + } + pr_debug("Loading AES key from cipher image\n"); /* Decrypt AES key */ @@ -808,7 +886,9 @@ static int _encrypt_data_with_pubkey(gnutls_datum_t *plaintext, gnutls_datum_t * /** * write_img_cipher encrypts the token with RSA public key and writes - * it to cipher.img. + * it to cipher.img. It also writes HMAC digest of all encrypted memory + * pages. This function should be called after all memory pages have + * been encrypted. */ int write_img_cipher(void) { @@ -849,6 +929,13 @@ int write_img_cipher(void) ce.aes_iv.len = aes_xts_iv.size; ce.aes_iv.data = aes_xts_iv.data; + /* Save HMAC digest */ + ce.has_hmac_digest = true; + ce.hmac_digest.len = HMAC_SIZE; + ce.hmac_digest.data = checkpoint_hmac_digest; + + gnutls_hmac_deinit(hmac_ctx, NULL); + pr_debug("Writing cipher image\n"); img = open_image(CR_FD_CIPHER, O_DUMP); if (!img) @@ -1225,21 +1312,73 @@ int tls_decryption_pipe(int input_fd, int pipe_write_fd) return 0; } +static inline void hmac_xor(uint8_t *dest, const uint8_t *src, size_t n) +{ + for (size_t i = 0; i < n; ++i) { + dest[i] ^= src[i]; + } +} + int tls_block_cipher_encrypt_data(void *ptext, size_t ptext_len) { int ret; + uint8_t digest[HMAC_SIZE]; ret = gnutls_cipher_encrypt2(block_cipher_handle, ptext, ptext_len, (void *)ptext, ptext_len); if (ret < 0) { tls_perror("Failed to encrypt data", ret); return -1; } + + ret = gnutls_hmac(hmac_ctx, ptext, ptext_len); + if (ret < 0) { + tls_perror("Failed to compute HMAC", ret); + return -1; + } + + ret = gnutls_hmac(hmac_ctx, &(hmac_metadata.vma_vaddr), sizeof(uint64_t)); + if (ret < 0) { + tls_perror("Failed to compute HMAC of VMA address", ret); + return -1; + } + + ret = gnutls_hmac(hmac_ctx, &(hmac_metadata.pid), sizeof(pid_t)); + if (ret < 0) { + tls_perror("Failed to compute HMAC of PID", ret); + return -1; + } + + gnutls_hmac_output(hmac_ctx, digest); + hmac_xor(checkpoint_hmac_digest, digest, sizeof(digest)); + return 0; } int tls_block_cipher_decrypt_data(void *ctext, size_t ctext_len) { int ret; + uint8_t digest[HMAC_SIZE]; + + ret = gnutls_hmac(hmac_ctx, ctext, ctext_len); + if (ret < 0) { + tls_perror("Failed to compute HMAC", ret); + return -1; + } + + ret = gnutls_hmac(hmac_ctx, &(hmac_metadata.vma_vaddr), sizeof(uint64_t)); + if (ret < 0) { + tls_perror("Failed to compute HMAC of metadata", ret); + return -1; + } + + ret = gnutls_hmac(hmac_ctx, &(hmac_metadata.pid), sizeof(pid_t)); + if (ret < 0) { + tls_perror("Failed to compute HMAC of PID", ret); + return -1; + } + + gnutls_hmac_output(hmac_ctx, digest); + hmac_xor(restore_hmac_digest, digest, sizeof(digest)); ret = gnutls_cipher_decrypt2(block_cipher_handle, ctext, ctext_len, (void *)ctext, ctext_len); if (ret < 0) { @@ -1359,11 +1498,15 @@ int tls_vma_io_pipe(int pages_img_fd, int pipe_fds[2][2]) /* Decrypt content of images */ for (int i = 0; i < nr; i++) { + /* Set vma address used to compute HMAC value */ + tls_set_hmac_vma_metadata(encode_pointer(remote_iovs[i].iov_base)); + for (int j = 0; j < local_iovs[i].iov_len; j += PAGE_SIZE) { if (tls_block_cipher_decrypt_data(local_iovs[i].iov_base + j, PAGE_SIZE)) { pr_err("Failed to decrypt data\n"); exit(1); } + tls_increment_hmac_vma_metadata(PAGE_SIZE); } } @@ -1392,3 +1535,23 @@ int tls_vma_io_pipe(int pages_img_fd, int pipe_fds[2][2]) exit(0); } + +bool tls_verify_hmac(void) +{ + bool exit_val = true; + + if (opts.encrypt) { + gnutls_hmac_deinit(hmac_ctx, NULL); + if (!gnutls_memcmp(restore_hmac_digest, checkpoint_hmac_digest, HMAC_SIZE)) { + pr_debug("HMAC verification successful\n"); + } else { + pr_err("HMAC mismatch\n"); + pr_digest("HMAC of restored memory", restore_hmac_digest); + pr_digest("Expected HMAC", checkpoint_hmac_digest); + exit_val = false; + } + munmap(restore_hmac_digest, HMAC_SIZE); + } + + return exit_val; +} \ No newline at end of file diff --git a/images/cipher.proto b/images/cipher.proto index c272139568..e059e42287 100644 --- a/images/cipher.proto +++ b/images/cipher.proto @@ -6,4 +6,5 @@ message cipher_entry { required bytes token = 1; optional bytes aes_key = 2; optional bytes aes_iv = 3; + optional bytes hmac_digest = 4; } From 4ef699c6f620d124e07e5d2b4c4359fc2a5ee4c1 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Sun, 15 Dec 2024 13:34:30 +0000 Subject: [PATCH 11/12] stats: measure time for encryption/decryption Measure the time for data encryption and decryption with stream and block ciphers. Signed-off-by: Radostin Stoyanov --- criu/cr-restore.c | 10 ++++---- criu/include/stats.h | 6 +++++ criu/stats.c | 16 +++++++++++++ criu/tls.c | 55 ++++++++++++++++++++++++++++++-------------- images/stats.proto | 6 +++++ 5 files changed, 71 insertions(+), 22 deletions(-) diff --git a/criu/cr-restore.c b/criu/cr-restore.c index 2b44a58970..64e19dbdd1 100644 --- a/criu/cr-restore.c +++ b/criu/cr-restore.c @@ -2359,18 +2359,18 @@ int cr_restore_tasks(void) if (init_service_fd()) return 1; - if (check_img_inventory(/* restore = */ true) < 0) + if (init_stats(RESTORE_STATS)) goto err; - if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE)) - return -1; - if (tls_initialize_cipher_from_image()) goto err; - if (init_stats(RESTORE_STATS)) + if (check_img_inventory(/* restore = */ true) < 0) goto err; + if (cr_plugin_init(CR_PLUGIN_STAGE__RESTORE)) + return -1; + if (lsm_check_opts()) goto err; diff --git a/criu/include/stats.h b/criu/include/stats.h index d8dd159989..bfdfa5aa36 100644 --- a/criu/include/stats.h +++ b/criu/include/stats.h @@ -8,6 +8,9 @@ enum { TIME_MEMWRITE, TIME_IRMAP_RESOLVE, + TIME_STREAM_CIPHER_ENCRYPTION, + TIME_BLOCK_CIPHER_ENCRYPTION, + DUMP_TIME_NR_STATS, }; @@ -15,6 +18,9 @@ enum { TIME_FORK, TIME_RESTORE, + TIME_STREAM_CIPHER_DECRYPTION, + TIME_BLOCK_CIPHER_DECRYPTION, + RESTORE_TIME_NS_STATS, }; diff --git a/criu/stats.c b/criu/stats.c index 0a9b4f5d47..9b568cc90c 100644 --- a/criu/stats.c +++ b/criu/stats.c @@ -170,6 +170,14 @@ void write_stats(int what) ds_entry.has_irmap_resolve = true; encode_time(TIME_IRMAP_RESOLVE, &ds_entry.irmap_resolve); + if (opts.encrypt) { + ds_entry.has_stream_cipher_encryption_time = true; + encode_time(TIME_STREAM_CIPHER_ENCRYPTION, &ds_entry.stream_cipher_encryption_time); + + ds_entry.has_block_cipher_encryption_time = true; + encode_time(TIME_BLOCK_CIPHER_ENCRYPTION, &ds_entry.block_cipher_encryption_time); + } + ds_entry.pages_scanned = dstats->counts[CNT_PAGES_SCANNED]; ds_entry.pages_skipped_parent = dstats->counts[CNT_PAGES_SKIPPED_PARENT]; ds_entry.pages_written = dstats->counts[CNT_PAGES_WRITTEN]; @@ -198,6 +206,14 @@ void write_stats(int what) encode_time(TIME_FORK, &rs_entry.forking_time); encode_time(TIME_RESTORE, &rs_entry.restore_time); + if (opts.encrypt) { + rs_entry.has_stream_cipher_decryption_time = true; + encode_time(TIME_STREAM_CIPHER_DECRYPTION, &rs_entry.stream_cipher_decryption_time); + + rs_entry.has_block_cipher_decryption_time = true; + encode_time(TIME_BLOCK_CIPHER_DECRYPTION, &rs_entry.block_cipher_decryption_time); + } + name = "restore"; } else return; diff --git a/criu/tls.c b/criu/tls.c index 2d3c326802..a1ab318bf8 100644 --- a/criu/tls.c +++ b/criu/tls.c @@ -19,6 +19,7 @@ #include "protobuf.h" #include "cr_options.h" #include "xmalloc.h" +#include "stats.h" #include "tls.h" /* Compatibility with GnuTLS version < 3.5 */ @@ -964,7 +965,7 @@ int write_img_cipher(void) */ int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *nonce_data) { - int ret; + int ret, exit_code = -1; giovec_t iov[1]; gnutls_datum_t key; static gnutls_aead_cipher_hd_t handle = NULL; @@ -974,6 +975,8 @@ int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n if (!opts.encrypt) return -1; + timing_start(TIME_STREAM_CIPHER_ENCRYPTION); + if (handle == NULL) { key.data = token; key.size = gnutls_cipher_get_key_size(stream_cipher_algorithm); @@ -981,7 +984,7 @@ int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n ret = gnutls_aead_cipher_init(&handle, stream_cipher_algorithm, &key); if (ret < 0) { tls_perror("Failed to initialize cipher", ret); - return -1; + goto err; } } @@ -992,7 +995,7 @@ int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n ret = gnutls_rnd(GNUTLS_RND_NONCE, nonce_data, nonce_len); if (ret < 0) { tls_perror("Failed to generate random nonce", ret); - return -1; + goto err; } iov[0].iov_base = data; @@ -1001,10 +1004,13 @@ int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n ret = gnutls_aead_cipher_encryptv2(handle, nonce_data, nonce_len, NULL, 0, iov, 1, tag_data, &tag_size); if (ret < 0) { tls_perror("Failed to encrypt data", ret); - return -1; + goto err; } - return 0; + exit_code = 0; +err: + timing_stop(TIME_STREAM_CIPHER_ENCRYPTION); + return exit_code; } /** @@ -1014,7 +1020,7 @@ int tls_encrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n */ int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *nonce_data) { - int ret; + int ret, exit_code = -1; giovec_t iov[1]; gnutls_datum_t key; gnutls_aead_cipher_hd_t handle = NULL; @@ -1024,10 +1030,12 @@ int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n key.data = token; key.size = gnutls_cipher_get_key_size(stream_cipher_algorithm); + timing_start(TIME_STREAM_CIPHER_DECRYPTION); + ret = gnutls_aead_cipher_init(&handle, stream_cipher_algorithm, &key); if (ret < 0) { tls_perror("Failed to initialize cipher", ret); - return -1; + goto err; } iov[0].iov_base = data; @@ -1036,12 +1044,14 @@ int tls_decrypt_data(void *data, size_t data_size, uint8_t *tag_data, uint8_t *n ret = gnutls_aead_cipher_decryptv2(handle, nonce_data, nonce_len, NULL, 0, iov, 1, tag_data, tag_size); if (ret < 0) { tls_perror("Failed to decrypt data", ret); - return -1; + goto err; } + exit_code = ret; gnutls_aead_cipher_deinit(handle); - - return ret; +err: + timing_stop(TIME_STREAM_CIPHER_DECRYPTION); + return exit_code; } /** @@ -1321,19 +1331,21 @@ static inline void hmac_xor(uint8_t *dest, const uint8_t *src, size_t n) int tls_block_cipher_encrypt_data(void *ptext, size_t ptext_len) { - int ret; + int ret, exit_code = -1; uint8_t digest[HMAC_SIZE]; + timing_start(TIME_BLOCK_CIPHER_ENCRYPTION); + ret = gnutls_cipher_encrypt2(block_cipher_handle, ptext, ptext_len, (void *)ptext, ptext_len); if (ret < 0) { tls_perror("Failed to encrypt data", ret); - return -1; + goto err; } ret = gnutls_hmac(hmac_ctx, ptext, ptext_len); if (ret < 0) { tls_perror("Failed to compute HMAC", ret); - return -1; + goto err; } ret = gnutls_hmac(hmac_ctx, &(hmac_metadata.vma_vaddr), sizeof(uint64_t)); @@ -1351,15 +1363,20 @@ int tls_block_cipher_encrypt_data(void *ptext, size_t ptext_len) gnutls_hmac_output(hmac_ctx, digest); hmac_xor(checkpoint_hmac_digest, digest, sizeof(digest)); - return 0; + exit_code = 0; +err: + timing_stop(TIME_BLOCK_CIPHER_ENCRYPTION); + return exit_code; } int tls_block_cipher_decrypt_data(void *ctext, size_t ctext_len) { - int ret; + int ret, exit_code = -1; uint8_t digest[HMAC_SIZE]; + timing_start(TIME_BLOCK_CIPHER_DECRYPTION); ret = gnutls_hmac(hmac_ctx, ctext, ctext_len); + if (ret < 0) { tls_perror("Failed to compute HMAC", ret); return -1; @@ -1383,9 +1400,13 @@ int tls_block_cipher_decrypt_data(void *ctext, size_t ctext_len) ret = gnutls_cipher_decrypt2(block_cipher_handle, ctext, ctext_len, (void *)ctext, ctext_len); if (ret < 0) { tls_perror("Failed to decrypt data", ret); - return -1; + goto err; } - return 0; + + exit_code = 0; +err: + timing_stop(TIME_BLOCK_CIPHER_DECRYPTION); + return exit_code; } /** diff --git a/images/stats.proto b/images/stats.proto index 64e46181da..c38fa62a3e 100644 --- a/images/stats.proto +++ b/images/stats.proto @@ -22,6 +22,9 @@ message dump_stats_entry { optional uint64 shpages_scanned = 12; optional uint64 shpages_skipped_parent = 13; optional uint64 shpages_written = 14; + + optional uint32 stream_cipher_encryption_time = 15; + optional uint32 block_cipher_encryption_time = 16; } message restore_stats_entry { @@ -32,6 +35,9 @@ message restore_stats_entry { required uint32 restore_time = 4; optional uint64 pages_restored = 5; + + optional uint32 stream_cipher_decryption_time = 6; + optional uint32 block_cipher_decryption_time = 7; } message stats_entry { From d4b1c376ac00ae8234752550db34928d90443e30 Mon Sep 17 00:00:00 2001 From: Radostin Stoyanov Date: Tue, 9 Jan 2024 15:30:52 +0000 Subject: [PATCH 12/12] scripts: add criu-keygen script This script, similar to ssh-keygen and certtool, makes it easier to generate and install certificate and key to enable encryption support with CRIU. Signed-off-by: Radostin Stoyanov --- Documentation/Makefile | 1 + Documentation/criu-keygen.txt | 29 +++++++++++ Makefile | 1 + criu/Makefile | 3 ++ scripts/criu-keygen | 96 +++++++++++++++++++++++++++++++++++ 5 files changed, 130 insertions(+) create mode 100644 Documentation/criu-keygen.txt create mode 100755 scripts/criu-keygen diff --git a/Documentation/Makefile b/Documentation/Makefile index de0cc448dc..f6f6778887 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -13,6 +13,7 @@ endif FOOTER := footer.txt SRC1 += crit.txt SRC1 += criu-ns.txt +SRC1 += criu-keygen.txt SRC1 += compel.txt SRC1 += criu-amdgpu-plugin.txt SRC8 += criu.txt diff --git a/Documentation/criu-keygen.txt b/Documentation/criu-keygen.txt new file mode 100644 index 0000000000..76f5faa450 --- /dev/null +++ b/Documentation/criu-keygen.txt @@ -0,0 +1,29 @@ +CRIU-KEYGEN(1) +============== +include::footer.txt[] + +NAME +---- +criu-keygen - criu encryption key utility + +SYNOPSIS +-------- +*criu-keygen* [] + +DESCRIPTION +----------- +The *criu-keygen* command generates and manages encryption keys for CRIU. +*criu-keygen* can create keys for use by CRIU. The type of key to be +generated is specified with the *-t* option. If invoked without any arguments, +*criu-keygen* will generate an RSA keys. + +A system administrator wishing to use CRIU with encryption, would run *criu-keygen* +once to create a certficicate and private key in '/etc/pki/criu/'. + +SEE ALSO +-------- +criu(8) + +AUTHOR +------ +The CRIU team diff --git a/Makefile b/Makefile index 60b78a0749..c29f6d9a7a 100644 --- a/Makefile +++ b/Makefile @@ -449,6 +449,7 @@ ruff: lib/pycriu/images/images.py \ scripts/criu-ns \ test/others/criu-ns/run.py \ + scripts/criu-keygen \ crit/*.py \ crit/crit/*.py \ scripts/uninstall_module.py \ diff --git a/criu/Makefile b/criu/Makefile index bafdd980bb..c5d2f422e6 100644 --- a/criu/Makefile +++ b/criu/Makefile @@ -147,12 +147,15 @@ install: $(obj)/criu $(Q) install -m 755 scripts/systemd-autofs-restart.sh $(DESTDIR)$(LIBEXECDIR)/criu/scripts $(E) " INSTALL " scripts/criu-ns $(Q) install -m 755 scripts/criu-ns $(DESTDIR)$(SBINDIR) + $(E) " INSTALL " scripts/criu-keygen + $(Q) install -m 755 scripts/criu-keygen $(DESTDIR)$(SBINDIR) .PHONY: install uninstall: $(E) " UNINSTALL" criu $(Q) $(RM) $(addprefix $(DESTDIR)$(SBINDIR)/,criu) $(Q) $(RM) $(addprefix $(DESTDIR)$(SBINDIR)/,criu-ns) + $(Q) $(RM) $(addprefix $(DESTDIR)$(SBINDIR)/,criu-keygen) $(Q) $(RM) $(addprefix $(DESTDIR)$(INCLUDEDIR)/criu/,$(notdir $(UAPI_HEADERS))) $(Q) $(RM) $(addprefix $(DESTDIR)$(LIBEXECDIR)/criu/scripts/,systemd-autofs-restart.sh) .PHONY: uninstall diff --git a/scripts/criu-keygen b/scripts/criu-keygen new file mode 100755 index 0000000000..6124f363a2 --- /dev/null +++ b/scripts/criu-keygen @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 +import os +import argparse +import datetime +import pathlib + +from cryptography import x509 +from cryptography.hazmat.primitives import hashes +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.primitives.asymmetric import rsa +from cryptography.hazmat.primitives.asymmetric import ec + +CRIU_PKI_PATH = "/etc/pki/criu" +CRIU_KEY_PATH = "/etc/pki/criu/private" + + +def generate_certificate(private_key): + """ + Generate x509 certificate from private key and save + them in the default PKI path. + """ + key_path = input("Enter file in which to save the key ({}): ".format( + os.path.join(CRIU_KEY_PATH, "key.pem") + )) + if not key_path: + key_path = os.path.join(CRIU_KEY_PATH, "key.pem") + + cert_path = input("Enter file in which to save the certificate ({}): ".format( + os.path.join(CRIU_PKI_PATH, "cert.pem") + )) + if not cert_path: + cert_path = os.path.join(CRIU_PKI_PATH, "cert.pem") + + subject = issuer = x509.Name([]) + cert = ( + x509.CertificateBuilder() + .subject_name(subject) + .issuer_name(issuer) + .public_key(private_key.public_key()) + .serial_number(x509.random_serial_number()) + .not_valid_before(datetime.datetime.now(datetime.timezone.utc)) + .not_valid_after(datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(days=10)) + .sign(private_key, hashes.SHA256()) + ) + + pathlib.Path(CRIU_KEY_PATH).mkdir(parents=True, exist_ok=True) + + os.umask(0o277) + with open(key_path, "wb") as f: + f.write(private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption(), + )) + + os.umask(0o222) + with open(cert_path, "wb") as f: + f.write(cert.public_bytes(serialization.Encoding.PEM)) + + +def generate_ec_key(key_size): + """ + Create a self-signed certificate with an EC key. + """ + if not key_size: + key_size = 256 + print("Generating public/private ec key pair.") + private_key = ec.generate_private_key(ec.SECP256R1(key_size=key_size)) + generate_certificate(private_key) + + +def generate_rsa_key(key_size): + """ + Create a self-signed certificate with an RSA key. + """ + if not key_size: + key_size = 2048 + print("Generating public/private rsa key pair.") + private_key = rsa.generate_private_key(public_exponent=65537, key_size=key_size) + generate_certificate(private_key) + + +def main(): + parser = argparse.ArgumentParser(description='Generate X.509 certificates and private keys for CRIU.') + parser.add_argument('--type', choices=['rsa', 'ec'], default='rsa', help='Type of key to create (default: rsa)') + parser.add_argument('--bits', type=int, help='Number of bits in the key') + args = parser.parse_args() + + if args.type == 'rsa': + generate_rsa_key(args.bits) + elif args.type == 'ec': + generate_ec_key() + + +if __name__ == "__main__": + main()