From 48f5ced2f8f6e3e8e5d9fcb412a1bf20f6c89ca3 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Wed, 18 Dec 2024 15:57:37 +0800 Subject: [PATCH 1/4] prepare-root: Bind var under /run instead of inplace --- src/libostree/ostree-impl-system-generator.c | 46 +------------------- src/switchroot/ostree-prepare-root.c | 41 ++++++++--------- 2 files changed, 20 insertions(+), 67 deletions(-) diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c index 6968c73884..3fe1d67dbd 100644 --- a/src/libostree/ostree-impl-system-generator.c +++ b/src/libostree/ostree-impl-system-generator.c @@ -126,34 +126,6 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha #endif } -// Resolve symlink to return osname -static gboolean -_ostree_sysroot_parse_bootlink_aboot (const char *bootlink, char **out_osname, GError **error) -{ - static gsize regex_initialized; - static GRegex *regex; - g_autofree char *symlink_val = glnx_readlinkat_malloc (-1, bootlink, NULL, error); - if (!symlink_val) - return glnx_prefix_error (error, "Failed to read '%s' symlink", bootlink); - - if (g_once_init_enter (®ex_initialized)) - { - regex = g_regex_new ("^deploy/([^/]+)/", 0, 0, NULL); - g_assert (regex); - g_once_init_leave (®ex_initialized, 1); - } - - g_autoptr (GMatchInfo) match = NULL; - if (!g_regex_match (regex, symlink_val, 0, &match)) - return glnx_throw (error, - "Invalid aboot symlink in /ostree, expected symlink to resolve to " - "deploy/OSNAME/... instead it resolves to '%s'", - symlink_val); - - *out_osname = g_match_info_fetch (match, 1); - return TRUE; -} - /* Generate var.mount */ static gboolean fstab_generator (const char *ostree_target, const bool is_aboot, const char *normal_dir, @@ -166,20 +138,6 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor static const char fstab_path[] = "/etc/fstab"; static const char var_path[] = "/var"; - /* Written by ostree-sysroot-deploy.c. We parse out the stateroot here since we - * need to know it to mount /var. Unfortunately we can't easily use the - * libostree API to find the booted deployment since /boot might not have been - * mounted yet. - */ - g_autofree char *stateroot = NULL; - if (is_aboot) - { - if (!_ostree_sysroot_parse_bootlink_aboot (ostree_target, &stateroot, error)) - return glnx_prefix_error (error, "Parsing aboot stateroot"); - } - else if (!_ostree_sysroot_parse_bootlink (ostree_target, NULL, &stateroot, NULL, NULL, error)) - return glnx_prefix_error (error, "Parsing stateroot"); - /* Load /etc/fstab if it exists, and look for a /var mount */ g_autoptr (OtLibMountFile) fstab = setmntent (fstab_path, "re"); gboolean found_var_mnt = FALSE; @@ -219,7 +177,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor return FALSE; /* Generate our bind mount unit */ - const char *stateroot_var_path = glnx_strjoina ("/sysroot/ostree/deploy/", stateroot, "/var"); + const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var"; g_auto (GLnxTmpfile) tmpf = { 0, @@ -253,7 +211,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor "Where=%s\n" "What=%s\n" "Options=bind,slave,shared\n", - var_path, stateroot_var_path)) + var_path, var_dir)) return FALSE; if (!g_output_stream_flush (outstream, cancellable, error)) return FALSE; diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 8e161be76b..e2123f5cf1 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -389,19 +389,16 @@ main (int argc, char *argv[]) g_autofree char *expected_digest = NULL; - // For now we just stick the transient root on the default /run tmpfs; - // however, see - // https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36 - // which bumps up these defaults for the rootfs a bit. - g_autofree char *root_upperdir - = root_transient ? g_build_filename (OTCORE_RUN_OSTREE_PRIVATE, "root/upper", NULL) - : NULL; - g_autofree char *root_workdir - = root_transient ? g_build_filename (OTCORE_RUN_OSTREE_PRIVATE, "root/work", NULL) : NULL; - // Propagate these options for transient root, if provided if (root_transient) { + // For now we just stick the transient root on the default /run tmpfs; + // however, see + // https://github.com/systemd/systemd/blob/604b2001081adcbd64ee1fbe7de7a6d77c5209fe/src/basic/mountpoint-util.h#L36 + // which bumps up these defaults for the rootfs a bit. + const char *root_upperdir = OTCORE_RUN_OSTREE_PRIVATE "/root/upper"; + const char *root_workdir = OTCORE_RUN_OSTREE_PRIVATE "/root/work"; + if (!glnx_shutil_mkdir_p_at (AT_FDCWD, root_upperdir, 0755, NULL, &error)) errx (EXIT_FAILURE, "Failed to create %s: %s", root_upperdir, error->message); if (!glnx_shutil_mkdir_p_at (AT_FDCWD, root_workdir, 0700, NULL, &error)) @@ -611,17 +608,15 @@ main (int argc, char *argv[]) err (EXIT_FAILURE, "failed to bind mount (class:readonly) /usr"); } - /* Prepare /var. - * When a read-only sysroot is configured, this adds a dedicated bind-mount (to itself) - * so that the stateroot location stays writable. */ - if (sysroot_readonly) - { - /* Bind-mount /var (at stateroot path), and remount as writable. */ - if (mount ("../../var", "../../var", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to prepare /var bind-mount at %s", srcpath); - if (mount ("../../var", "../../var", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", srcpath); - } + const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var"; + + /* Bind-mount /var, and remount as writable. */ + if (mkdirat (AT_FDCWD, var_dir, 0) < 0) + err (EXIT_FAILURE, "failed to mkdir %s", var_dir); + if (mount ("../../var", var_dir, NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to prepare /var bind-mount at %s", var_dir); + if (mount (var_dir, var_dir, NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", var_dir); /* When running under systemd, /var will be handled by a 'var.mount' unit outside * of initramfs. @@ -640,8 +635,8 @@ main (int argc, char *argv[]) */ if (mount_var) { - if (mount ("../../var", TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to bind mount ../../var to var"); + if (mount (var_dir, TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to bind mount /var"); /* To avoid having submounts of /var propagate into $stateroot/var, the * mount is made with slave+shared propagation. See the comment in From d25cd570520b0439fc548df81505d8048c5541ab Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 19 Dec 2024 21:16:15 +0800 Subject: [PATCH 2/4] prepare-root: Unmount temporary var mount after /var is mounted --- src/libostree/ostree-impl-system-generator.c | 128 +++++++++++-------- src/switchroot/ostree-prepare-root.c | 5 +- 2 files changed, 77 insertions(+), 56 deletions(-) diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c index 3fe1d67dbd..dc502b28fe 100644 --- a/src/libostree/ostree-impl-system-generator.c +++ b/src/libostree/ostree-impl-system-generator.c @@ -126,6 +126,35 @@ require_internal_units (const char *normal_dir, const char *early_dir, const cha #endif } +static gboolean +write_unit_file (int dir_fd, const char *path, GCancellable *cancellable, GError **error, const char *fmt, ...) +{ + g_auto (GLnxTmpfile) tmpf = { + 0, + }; + if (!glnx_open_tmpfile_linkable_at (dir_fd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error)) + return FALSE; + g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE); + gsize bytes_written; + va_list args; + va_start (args, fmt); + const gboolean r = g_output_stream_vprintf (outstream, &bytes_written, cancellable, error, fmt, args); + va_end (args); + if (!r) + return FALSE; + if (!g_output_stream_flush (outstream, cancellable, error)) + return FALSE; + g_clear_object (&outstream); + /* It should be readable */ + if (!glnx_fchmod (tmpf.fd, 0644, error)) + return FALSE; + /* Error out if somehow it already exists, that'll help us debug conflicts */ + if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, dir_fd, path, + error)) + return FALSE; + return TRUE; +} + /* Generate var.mount */ static gboolean fstab_generator (const char *ostree_target, const bool is_aboot, const char *normal_dir, @@ -135,8 +164,37 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor /* Not currently cancellable, but define a var in case we care later */ GCancellable *cancellable = NULL; /* Some path constants to avoid typos */ - static const char fstab_path[] = "/etc/fstab"; - static const char var_path[] = "/var"; + const char *fstab_path = "/etc/fstab"; + const char *var_dst = "/var"; + const char *var_src = OTCORE_RUN_OSTREE_PRIVATE "/var"; + + /* Prepare to write to the output unit dir; we use the "normal" dir + * that overrides /usr, but not /etc. + */ + glnx_autofd int normal_dir_dfd = -1; + if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error)) + return FALSE; + + /* Generate a unit to unmount var_src */ + if (!write_unit_file (normal_dir_dfd, "ostree-unmount-temp-var.service", cancellable, error, + "##\n# Automatically generated by ostree-system-generator\n##\n\n" + "[Unit]\n" + "Documentation=man:ostree(1)\n" + "ConditionPathIsMountPoint=%s\n" + "After=var.mount\n" + "\n" + "[Service]\n" + "Type=oneshot\n" + "ExecStart=/usr/bin/umount --lazy %s\n", + var_src, var_src)) + return FALSE; + + if (!glnx_shutil_mkdir_p_at (normal_dir_dfd, "local-fs.target.wants", 0755, cancellable, + error)) + return FALSE; + if (symlinkat ("../ostree-unmount-temp-var.service", normal_dir_dfd, + "local-fs.target.wants/ostree-unmount-temp-var.service") < 0) + return glnx_throw_errno_prefix (error, "symlinkat"); /* Load /etc/fstab if it exists, and look for a /var mount */ g_autoptr (OtLibMountFile) fstab = setmntent (fstab_path, "re"); @@ -157,7 +215,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor path_kill_slashes (where); /* We're only looking for /var here */ - if (strcmp (where, var_path) != 0) + if (strcmp (where, var_dst) != 0) continue; found_var_mnt = TRUE; @@ -169,59 +227,19 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor if (found_var_mnt) return TRUE; - /* Prepare to write to the output unit dir; we use the "normal" dir - * that overrides /usr, but not /etc. - */ - glnx_autofd int normal_dir_dfd = -1; - if (!glnx_opendirat (AT_FDCWD, normal_dir, TRUE, &normal_dir_dfd, error)) - return FALSE; - /* Generate our bind mount unit */ - const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var"; - - g_auto (GLnxTmpfile) tmpf = { - 0, - }; - if (!glnx_open_tmpfile_linkable_at (normal_dir_dfd, ".", O_WRONLY | O_CLOEXEC, &tmpf, error)) - return FALSE; - g_autoptr (GOutputStream) outstream = g_unix_output_stream_new (tmpf.fd, FALSE); - gsize bytes_written; - /* This code is inspired by systemd's fstab-generator.c. - * - * Note that our unit doesn't run if systemd.volatile is enabled; - * see https://github.com/ostreedev/ostree/pull/856 - * - * To avoid having submounts of /var propagate into $stateroot/var, the mount - * is made with slave+shared propagation. This means that /var will receive - * mount events from the parent /sysroot mount, but not vice versa. Adding a - * shared peer group below the slave group means that submounts of /var will - * inherit normal shared propagation. See mount_namespaces(7), Linux - * Documentation/filesystems/sharedsubtree.txt and - * https://github.com/ostreedev/ostree/issues/2086. This also happens in - * ostree-prepare-root.c for the INITRAMFS_MOUNT_VAR case. - */ - if (!g_output_stream_printf (outstream, &bytes_written, cancellable, error, - "##\n# Automatically generated by ostree-system-generator\n##\n\n" - "[Unit]\n" - "Documentation=man:ostree(1)\n" - "ConditionKernelCommandLine=!systemd.volatile\n" - "Before=local-fs.target\n" - "\n" - "[Mount]\n" - "Where=%s\n" - "What=%s\n" - "Options=bind,slave,shared\n", - var_path, var_dir)) - return FALSE; - if (!g_output_stream_flush (outstream, cancellable, error)) - return FALSE; - g_clear_object (&outstream); - /* It should be readable */ - if (!glnx_fchmod (tmpf.fd, 0644, error)) - return FALSE; - /* Error out if somehow it already exists, that'll help us debug conflicts */ - if (!glnx_link_tmpfile_at (&tmpf, GLNX_LINK_TMPFILE_NOREPLACE, normal_dir_dfd, "var.mount", - error)) + if (!write_unit_file (normal_dir_dfd, "var.mount", cancellable, error, + "##\n# Automatically generated by ostree-system-generator\n##\n\n" + "[Unit]\n" + "Documentation=man:ostree(1)\n" + "ConditionKernelCommandLine=!systemd.volatile\n" + "Before=local-fs.target\n" + "\n" + "[Mount]\n" + "Where=%s\n" + "What=%s\n" + "Options=bind,slave,shared\n", + var_dst, var_src)) return FALSE; /* And ensure it's required; newer systemd will auto-inject fs dependencies diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index e2123f5cf1..47a2e6182e 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -636,7 +636,10 @@ main (int argc, char *argv[]) if (mount_var) { if (mount (var_dir, TMP_SYSROOT "/var", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to bind mount /var"); + err (EXIT_FAILURE, "failed to bind mount %s to /var", var_dir); + + if (umount2 (var_dir, MNT_DETACH) < 0) + err (EXIT_FAILURE, "failed to umount %s", var_dir); /* To avoid having submounts of /var propagate into $stateroot/var, the * mount is made with slave+shared propagation. See the comment in From d472e60e03b1ab191c51ecdf3606e16a3e034179 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Fri, 20 Dec 2024 18:55:47 +0800 Subject: [PATCH 3/4] prepare-root: It's not necessary to make /var slave anymore --- src/libostree/ostree-impl-system-generator.c | 2 +- src/switchroot/ostree-prepare-root.c | 10 ---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/libostree/ostree-impl-system-generator.c b/src/libostree/ostree-impl-system-generator.c index dc502b28fe..6653b826c3 100644 --- a/src/libostree/ostree-impl-system-generator.c +++ b/src/libostree/ostree-impl-system-generator.c @@ -238,7 +238,7 @@ fstab_generator (const char *ostree_target, const bool is_aboot, const char *nor "[Mount]\n" "Where=%s\n" "What=%s\n" - "Options=bind,slave,shared\n", + "Options=bind\n", var_dst, var_src)) return FALSE; diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 47a2e6182e..2f56879d18 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -640,16 +640,6 @@ main (int argc, char *argv[]) if (umount2 (var_dir, MNT_DETACH) < 0) err (EXIT_FAILURE, "failed to umount %s", var_dir); - - /* To avoid having submounts of /var propagate into $stateroot/var, the - * mount is made with slave+shared propagation. See the comment in - * ostree-impl-system-generator.c when /var isn't mounted in the - * initramfs for further explanation. - */ - if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SLAVE | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to change /var to slave mount"); - if (mount (NULL, TMP_SYSROOT "/var", NULL, MS_SHARED | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to change /var to slave+shared mount"); } /* This can be used by other things to signal ostree is in use */ From 71f9146a34f660a0729c43949b1fcbc9e1a3e610 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Fri, 3 Jan 2025 05:44:41 +0800 Subject: [PATCH 4/4] prepare-root: bind mount var under deployment for compatibility --- src/switchroot/ostree-prepare-root.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 2f56879d18..f39324db60 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -610,13 +610,23 @@ main (int argc, char *argv[]) const char *var_dir = OTCORE_RUN_OSTREE_PRIVATE "/var"; - /* Bind-mount /var, and remount as writable. */ - if (mkdirat (AT_FDCWD, var_dir, 0) < 0) - err (EXIT_FAILURE, "failed to mkdir %s", var_dir); + /* Prepare /var. + * When a read-only sysroot is configured, this adds a dedicated bind-mount (to itself) + * so that the stateroot location stays writable. */ + if (sysroot_readonly) + { + /* Bind-mount /var (at stateroot path), and remount as writable. */ + if (mount ("../../var", "../../var", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to prepare /var bind-mount at %s", srcpath); + if (mount ("../../var", "../../var", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", srcpath); + } + + /* Bind-mount var to var_dir */ if (mount ("../../var", var_dir, NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to prepare /var bind-mount at %s", var_dir); - if (mount (var_dir, var_dir, NULL, MS_BIND | MS_REMOUNT | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to make writable /var bind-mount at %s", var_dir); + err (EXIT_FAILURE, "failed to bind mount %s", var_dir); + if (mount (NULL, var_dir, NULL, MS_SLAVE | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to change %s to slave mount", var_dir); /* When running under systemd, /var will be handled by a 'var.mount' unit outside * of initramfs.