From 4b111ddf998d27de3cdc76694a1b7d6a8c9c1a4d Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Wed, 20 Nov 2024 16:47:02 -0500 Subject: [PATCH] =?UTF-8?q?install:=20Add=20`ensure-completion`=20verb,=20?= =?UTF-8?q?wire=20up=20ostree-deploy=20=E2=86=92=20bootc?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When bootc was created, it started to become a superset of ostree; in particular things like `/usr/lib/bootc/kargs.d` and logically bound images. However...Anaconda today is still invoking `ostree container image deploy`. Main fix -------- When bootc takes over the `/usr/libexec/ostree/ext/ostree-container` entrypoint, make the existing `ostree container image deploy` CLI actually just call back into bootc to fix things up. No additional work required other than getting an updated bootc in the Anaconda ISO. Old Anaconda ISOs ----------------- But, a further problem here is that Anaconda is only updated once per OS major+minor - e.g. there won't be an update to it for the lifetime of RHEL 9.5 or Fedora 41. We want the ability to ship new features and bugfixes in those OSes (especially RHEL9.5). So given that we have a newer bootc in the target container, we can do this: ``` %post --erroronfail bootc install ensure-completion %end ``` And will fix things up. Of course there's fun $details here...the way Anaconda implements `%post` is via a hand-augmented `chroot` i.e. a degenerate container, and we need to escape that and fix some things up (such as a missing cgroupfs mount). Summmary -------- - With a newer bootc in the ISO, everything just works - For older ISOs, one can add the `%post` above as a workaround. Implementation details: Cross-linking bootc and ostree-rs-ext ------------------------------------------------------------- This whole thing is very confusing because now, the linkage between bootc and ostree-rs-ext is bidirectional. In the case of `bootc install to-filesystem`, we end up calling into ostree-rs-ext, and we *must not* recurse back into bootc, because at least for kernel arguments we might end up applying them *twice*. We do this by passing a CLI argument. The second problem is the crate-level dependency; right now they're independent crates so we can't have ostree-rs-ext actually call into bootc directly, as convenient as that would be. So we end up forking ourselves as a subprocess. But that's not too bad because we need to carry a subprocess-based entrypoint *anyways* for the Anaconda `%post` case. Implementation details: /etc/resolv.conf ---------------------------------------- There's some surprising stuff going on in how Anaconda handles `/etc/resolv.conf` in the target root that I got burned by. In Fedora it's trying to query if systemd-resolved is enabled in the target or something? I ended up writing some code to just try to paper over this to ensure we have networking in the `%post` where we need it to fetch LBIs. Signed-off-by: Colin Walters --- lib/src/boundimage.rs | 24 ++- lib/src/cli.rs | 29 +++ lib/src/install.rs | 5 +- lib/src/install/completion.rs | 298 +++++++++++++++++++++++++++++ ostree-ext/src/container/deploy.rs | 31 +++ ostree-ext/src/globals.rs | 5 +- 6 files changed, 380 insertions(+), 12 deletions(-) create mode 100644 lib/src/install/completion.rs diff --git a/lib/src/boundimage.rs b/lib/src/boundimage.rs index bc78d538a..0e85b36d7 100644 --- a/lib/src/boundimage.rs +++ b/lib/src/boundimage.rs @@ -5,8 +5,6 @@ //! pre-pulled (and in the future, pinned) before a new image root //! is considered ready. -use std::num::NonZeroUsize; - use anyhow::{Context, Result}; use camino::Utf8Path; use cap_std_ext::cap_std::fs::Dir; @@ -49,7 +47,7 @@ pub(crate) async fn pull_bound_images(sysroot: &Storage, deployment: &Deployment #[context("Querying bound images")] pub(crate) fn query_bound_images_for_deployment( - sysroot: &Storage, + sysroot: &ostree_ext::ostree::Sysroot, deployment: &Deployment, ) -> Result> { let deployment_root = &crate::utils::deployment_fd(sysroot, deployment)?; @@ -153,15 +151,21 @@ pub(crate) async fn pull_images( sysroot: &Storage, bound_images: Vec, ) -> Result<()> { - tracing::debug!("Pulling bound images: {}", bound_images.len()); - // Yes, the usage of NonZeroUsize here is...maybe odd looking, but I find - // it an elegant way to divide (empty vector, non empty vector) since - // we want to print the length too below. - let Some(n) = NonZeroUsize::new(bound_images.len()) else { - return Ok(()); - }; // Only do work like initializing the image storage if we have images to pull. + if bound_images.is_empty() { + return Ok(()); + } let imgstore = sysroot.get_ensure_imgstore()?; + pull_images_impl(imgstore, bound_images).await +} + +#[context("Pulling bound images")] +pub(crate) async fn pull_images_impl( + imgstore: &crate::imgstorage::Storage, + bound_images: Vec, +) -> Result<()> { + let n = bound_images.len(); + tracing::debug!("Pulling bound images: {n}"); // TODO: do this in parallel for bound_image in bound_images { let image = &bound_image.image; diff --git a/lib/src/cli.rs b/lib/src/cli.rs index 7efdda2ef..12502f6ae 100644 --- a/lib/src/cli.rs +++ b/lib/src/cli.rs @@ -182,6 +182,24 @@ pub(crate) enum InstallOpts { /// will be wiped, but the content of the existing root will otherwise be retained, and will /// need to be cleaned up if desired when rebooted into the new root. ToExistingRoot(crate::install::InstallToExistingRootOpts), + /// Intended for use in environments that are performing an ostree-based installation, not bootc. + /// + /// In this scenario the installation may be missing bootc specific features such as + /// kernel arguments, logically bound images and more. This command can be used to attempt + /// to reconcile. At the current time, the only tested environment is Anaconda using `ostreecontainer` + /// and it is recommended to avoid usage outside of that environment. Instead, ensure your + /// code is using `bootc install to-filesystem` from the start. + #[clap(hide = true)] + EnsureCompletion { + /// When provided, we assume that we're being invoked from our own + /// ostree-ext codebase. + #[clap(long)] + sysroot: Option, + + /// Must be set if sysroot is set + #[clap(long)] + stateroot: Option, + }, /// Output JSON to stdout that contains the merged installation configuration /// as it may be relevant to calling processes using `install to-filesystem` /// that in particular want to discover the desired root filesystem type from the container image. @@ -989,6 +1007,17 @@ async fn run_from_opt(opt: Opt) -> Result<()> { crate::install::install_to_existing_root(opts).await } InstallOpts::PrintConfiguration => crate::install::print_configuration(), + InstallOpts::EnsureCompletion { sysroot, stateroot } => { + let rootfs = &Dir::open_ambient_dir("/", cap_std::ambient_authority())?; + if let Some(sysroot) = sysroot { + let stateroot = stateroot.as_deref().ok_or_else(|| { + anyhow::anyhow!("Expected stateroot when --sysroot is set") + })?; + crate::install::completion::run_from_ostree(rootfs, &sysroot, stateroot).await + } else { + crate::install::completion::run_from_anaconda(rootfs).await + } + } }, #[cfg(feature = "install")] Opt::ExecInHostMountNamespace { args } => { diff --git a/lib/src/install.rs b/lib/src/install.rs index 98857edf3..7a4edf4bd 100644 --- a/lib/src/install.rs +++ b/lib/src/install.rs @@ -7,6 +7,7 @@ // This sub-module is the "basic" installer that handles creating basic block device // and filesystem setup. pub(crate) mod baseline; +pub(crate) mod completion; pub(crate) mod config; mod osbuild; pub(crate) mod osconfig; @@ -762,6 +763,7 @@ async fn install_container( )?; let kargsd = kargsd.iter().map(|s| s.as_str()); + // Keep this in sync with install/completion.rs for the Anaconda fixups let install_config_kargs = state .install_config .as_ref() @@ -786,6 +788,7 @@ async fn install_container( options.kargs = Some(kargs.as_slice()); options.target_imgref = Some(&state.target_imgref); options.proxy_cfg = proxy_cfg; + options.skip_completion = true; // Must be set to avoid recursion! options.no_clean = has_ostree; let imgstate = crate::utils::async_task_with_spinner( "Deploying container image", @@ -1383,7 +1386,7 @@ async fn install_with_sysroot( } } BoundImages::Unresolved(bound_images) => { - crate::boundimage::pull_images(sysroot, bound_images) + crate::boundimage::pull_images_impl(imgstore, bound_images) .await .context("pulling bound images")?; } diff --git a/lib/src/install/completion.rs b/lib/src/install/completion.rs new file mode 100644 index 000000000..1b318beb8 --- /dev/null +++ b/lib/src/install/completion.rs @@ -0,0 +1,298 @@ +//! This module handles finishing/completion after an ostree-based +//! install from e.g. Anaconda. + +use std::io; +use std::os::fd::AsFd; +use std::process::Command; + +use anyhow::{Context, Result}; +use bootc_utils::CommandRunExt; +use camino::Utf8Path; +use cap_std_ext::{cap_std::fs::Dir, dirext::CapStdExtDirExt}; +use fn_error_context::context; +use ostree_ext::{gio, ostree}; +use rustix::fs::Mode; +use rustix::fs::OFlags; + +use super::config; +use crate::utils::medium_visibility_warning; + +/// An environment variable set by anaconda that hints +/// we are running as part of that environment. +const ANACONDA_ENV_HINT: &str = "ANA_INSTALL_PATH"; +/// Global flag to signal we're in a booted ostree system +const OSTREE_BOOTED: &str = "run/ostree-booted"; +/// The very well-known DNS resolution file +const RESOLVCONF: &str = "etc/resolv.conf"; +/// A renamed file +const RESOLVCONF_ORIG: &str = "etc/resolv.conf.bootc-original"; +/// The root filesystem for pid 1 +const PROC1_ROOT: &str = "proc/1/root"; +/// The cgroupfs mount point, which we may propagate from the host if needed +const CGROUPFS: &str = "sys/fs/cgroup"; +/// The path to the temporary global ostree pull secret +const RUN_OSTREE_AUTH: &str = "run/ostree/auth.json"; +/// A sub path of /run which is used to ensure idempotency +pub(crate) const RUN_BOOTC_INSTALL_RECONCILED: &str = "run/bootc-install-reconciled"; + +/// Assuming that the current root is an ostree deployment, pull kargs +/// from it and inject them. +fn reconcile_kargs(sysroot: &ostree::Sysroot, deployment: &ostree::Deployment) -> Result<()> { + let deployment_root = &crate::utils::deployment_fd(sysroot, deployment)?; + let cancellable = gio::Cancellable::NONE; + + let current_kargs = deployment + .bootconfig() + .expect("bootconfig for deployment") + .get("options"); + let current_kargs = current_kargs + .as_ref() + .map(|s| s.as_str()) + .unwrap_or_default(); + tracing::debug!("current_kargs={current_kargs}"); + let current_kargs = ostree::KernelArgs::from_string(¤t_kargs); + + // Keep this in sync with install_container + let install_config = config::load_config()?; + let install_config_kargs = install_config + .as_ref() + .and_then(|c| c.kargs.as_ref()) + .into_iter() + .flatten() + .map(|s| s.as_str()) + .collect::>(); + let kargsd = crate::kargs::get_kargs_in_root(deployment_root, std::env::consts::ARCH)?; + let kargsd = kargsd.iter().map(|s| s.as_str()).collect::>(); + + current_kargs.append_argv(&install_config_kargs); + current_kargs.append_argv(&kargsd); + let new_kargs = current_kargs.to_string(); + tracing::debug!("new_kargs={new_kargs}"); + + sysroot.deployment_set_kargs_in_place(deployment, Some(&new_kargs), cancellable)?; + Ok(()) +} + +/// A little helper struct which on drop renames a file. Used for putting back /etc/resolv.conf. +#[must_use] +struct Renamer<'d> { + dir: &'d Dir, + from: &'static Utf8Path, + to: &'static Utf8Path, +} + +impl<'d> Renamer<'d> { + fn _impl_drop(&mut self) -> Result<()> { + self.dir + .rename(self.from, self.dir, self.to) + .map_err(Into::into) + } + + fn consume(mut self) -> Result<()> { + self._impl_drop() + } +} + +impl<'d> Drop for Renamer<'d> { + fn drop(&mut self) { + let _ = self._impl_drop(); + } +} +/// Work around https://github.com/containers/buildah/issues/4242#issuecomment-2492480586 +/// among other things. We unconditionally replace the contents of `/etc/resolv.conf` +/// in the target root with whatever the host uses (in Fedora 41+, that's systemd-resolved for Anaconda). +#[context("Copying host resolv.conf")] +fn ensure_resolvconf<'d>(rootfs: &'d Dir, proc1_root: &Dir) -> Result>> { + // Now check the state of etc/resolv.conf in the target root + let meta = rootfs + .symlink_metadata_optional(RESOLVCONF) + .context("stat")?; + let renamer = if meta.is_some() { + rootfs + .rename(RESOLVCONF, &rootfs, RESOLVCONF_ORIG) + .context("Renaming")?; + Some(Renamer { + dir: &rootfs, + from: RESOLVCONF_ORIG.into(), + to: RESOLVCONF.into(), + }) + } else { + None + }; + // If we got here, /etc/resolv.conf either didn't exist or we removed it. + // Copy the host data into it (note this will follow symlinks; e.g. + // Anaconda in Fedora 41+ defaults to systemd-resolved) + proc1_root + .copy(RESOLVCONF, rootfs, RESOLVCONF) + .context("Copying new resolv.conf")?; + Ok(renamer) +} + +/// Bind a mount point from the host namespace into our root +fn bind_from_host( + rootfs: &Dir, + src: impl AsRef, + target: impl AsRef, +) -> Result<()> { + fn bind_from_host_impl(rootfs: &Dir, src: &Utf8Path, target: &Utf8Path) -> Result<()> { + rootfs.create_dir_all(target)?; + if rootfs.is_mountpoint(target)?.unwrap_or_default() { + return Ok(()); + } + let target = format!("/mnt/sysroot/{target}"); + tracing::debug!("Binding {src} to {target}"); + // We're run in a mount namespace, but not a pid namespace; use nsenter + // via the pid namespace to escape to the host's mount namespace and + // perform a mount there. + Command::new("nsenter") + .args(["-m", "-t", "1", "--", "mount", "--bind"]) + .arg(src) + .arg(&target) + .run()?; + Ok(()) + } + + bind_from_host_impl(rootfs, src.as_ref(), target.as_ref()) +} + +/// Anaconda doesn't mount /sys/fs/cgroup in /mnt/sysroot +#[context("Ensuring cgroupfs")] +fn ensure_cgroupfs(rootfs: &Dir) -> Result<()> { + bind_from_host(rootfs, CGROUPFS, CGROUPFS) +} + +/// If we have /etc/ostree/auth.json in the Anaconda environment then propagate +/// it into /run/ostree/auth.json +#[context("Propagating ostree auth")] +fn ensure_ostree_auth(rootfs: &Dir, host_root: &Dir) -> Result<()> { + let Some((authpath, authfd)) = + ostree_ext::globals::get_global_authfile(&host_root).context("Querying authfiles")? + else { + tracing::debug!("No auth found in host"); + return Ok(()); + }; + tracing::debug!("Discovered auth in host: {authpath}"); + let mut authfd = io::BufReader::new(authfd); + let run_ostree_auth = Utf8Path::new(RUN_OSTREE_AUTH); + rootfs.create_dir_all(run_ostree_auth.parent().unwrap())?; + rootfs.atomic_replace_with(run_ostree_auth, |w| std::io::copy(&mut authfd, w))?; + Ok(()) +} + +#[context("Opening {PROC1_ROOT}")] +fn open_proc1_root(rootfs: &Dir) -> Result { + let proc1_root = rustix::fs::openat( + &rootfs.as_fd(), + PROC1_ROOT, + OFlags::CLOEXEC | OFlags::DIRECTORY, + Mode::empty(), + )?; + Dir::reopen_dir(&proc1_root.as_fd()).map_err(Into::into) +} + +/// Core entrypoint invoked when we are likely being invoked from inside Anaconda as a `%post`. +pub(crate) async fn run_from_anaconda(rootfs: &Dir) -> Result<()> { + // unshare our mount namespace, so any *further* mounts aren't leaked. + // Note that because this does a re-exec, anything *before* this point + // should be idempotent. + crate::cli::ensure_self_unshared_mount_namespace()?; + + if std::env::var_os(ANACONDA_ENV_HINT).is_none() { + // Be loud if a user is invoking this outside of the expected setup. + medium_visibility_warning(&format!("Missing environment variable {ANACONDA_ENV_HINT}")); + } else { + // In the way Anaconda sets up the bind mounts today, this doesn't exist. Later + // code expects it to exist, so do so. + if !rootfs.try_exists(OSTREE_BOOTED)? { + tracing::debug!("Writing {OSTREE_BOOTED}"); + rootfs.atomic_write(OSTREE_BOOTED, b"")?; + } + } + + // Get access to the real root by opening /proc/1/root + let proc1_root = &open_proc1_root(rootfs)?; + + if proc1_root + .try_exists(RUN_BOOTC_INSTALL_RECONCILED) + .context("Querying reconciliation")? + { + println!("Reconciliation already completed."); + return Ok(()); + } + + ensure_cgroupfs(rootfs)?; + // Sometimes Anaconda may not initialize networking in the target root? + let resolvconf = ensure_resolvconf(rootfs, proc1_root)?; + // Propagate an injected authfile for pulling logically bound images + ensure_ostree_auth(rootfs, proc1_root)?; + + let sysroot = ostree::Sysroot::new(Some(&gio::File::for_path("/"))); + sysroot + .load(gio::Cancellable::NONE) + .context("Loading sysroot")?; + impl_completion(rootfs, &sysroot, None).await?; + + proc1_root + .write(RUN_BOOTC_INSTALL_RECONCILED, b"") + .with_context(|| format!("Writing {RUN_BOOTC_INSTALL_RECONCILED}"))?; + if let Some(resolvconf) = resolvconf { + resolvconf.consume()?; + } + Ok(()) +} + +/// From ostree-rs-ext, run through the rest of bootc install functionality +pub async fn run_from_ostree(rootfs: &Dir, sysroot: &Utf8Path, stateroot: &str) -> Result<()> { + // Load sysroot from the provided path + let sysroot = ostree::Sysroot::new(Some(&gio::File::for_path(sysroot))); + sysroot.load(gio::Cancellable::NONE)?; + + impl_completion(rootfs, &sysroot, Some(stateroot)).await?; + + // In this case we write the completion directly to /run as we're running from + // the host context. + rootfs + .write(RUN_BOOTC_INSTALL_RECONCILED, b"") + .with_context(|| format!("Writing {RUN_BOOTC_INSTALL_RECONCILED}"))?; + Ok(()) +} + +/// Core entrypoint for completion of an ostree-based install to a bootc one: +/// +/// - kernel argument handling +/// - logically bound images +/// +/// We could also do other things here, such as write an aleph file or +/// ensure the repo config is synchronized, but these two are the most important +/// for now. +pub(crate) async fn impl_completion( + rootfs: &Dir, + sysroot: &ostree::Sysroot, + stateroot: Option<&str>, +) -> Result<()> { + let deployment = &sysroot + .merge_deployment(stateroot) + .ok_or_else(|| anyhow::anyhow!("Failed to find deployment (stateroot={stateroot:?}"))?; + let sysroot_dir = Dir::reopen_dir(&crate::utils::sysroot_fd(&sysroot))?; + + // Create a subdir in /run + let rundir = "run/bootc-install"; + rootfs.create_dir_all(rundir)?; + let rundir = &rootfs.open_dir(rundir)?; + + // ostree-ext doesn't do kargs, so handle that now + reconcile_kargs(&sysroot, deployment)?; + + // ostree-ext doesn't do logically bound images + let bound_images = crate::boundimage::query_bound_images_for_deployment(sysroot, deployment)?; + if !bound_images.is_empty() { + // When we're run through ostree, we only lazily initialize the podman storage to avoid + // having a hard dependency on it. + let imgstorage = &crate::imgstorage::Storage::create(&sysroot_dir, &rundir)?; + crate::boundimage::pull_images_impl(imgstorage, bound_images) + .await + .context("pulling bound images")?; + } + + Ok(()) +} diff --git a/ostree-ext/src/container/deploy.rs b/ostree-ext/src/container/deploy.rs index 4d0ec1bf4..e1698be9c 100644 --- a/ostree-ext/src/container/deploy.rs +++ b/ostree-ext/src/container/deploy.rs @@ -1,9 +1,13 @@ //! Perform initial setup for a container image based system root use std::collections::HashSet; +use std::os::fd::BorrowedFd; +use std::process::Command; use anyhow::Result; +use cap_std_ext::cmdext::CapStdExtCommandExt; use fn_error_context::context; +use ocidir::cap_std::fs::Dir; use ostree::glib; use super::store::{gc_image_layers, LayeredImageState}; @@ -44,10 +48,19 @@ pub struct DeployOpts<'a> { /// it will not be necessary to remove the previous image. pub no_imgref: bool, + /// Do not invoke bootc completion + pub skip_completion: bool, + /// Do not cleanup deployments pub no_clean: bool, } +// Access the file descriptor for a sysroot +#[allow(unsafe_code)] +pub(crate) fn sysroot_fd(sysroot: &ostree::Sysroot) -> BorrowedFd { + unsafe { BorrowedFd::borrow_raw(sysroot.fd()) } +} + /// Write a container image to an OSTree deployment. /// /// This API is currently intended for only an initial deployment. @@ -58,6 +71,7 @@ pub async fn deploy( imgref: &OstreeImageReference, options: Option>, ) -> Result> { + let sysroot_dir = &Dir::reopen_dir(&sysroot_fd(sysroot))?; let cancellable = ostree::gio::Cancellable::NONE; let options = options.unwrap_or_default(); let repo = &sysroot.repo(); @@ -122,6 +136,23 @@ pub async fn deploy( flags, cancellable, )?; + + // We end up re-executing ourselves as a subprocess because + // otherwise right now we end up with a circular dependency between + // crates. We need an option to skip though so when the *main* + // bootc install code calls this API, we don't do this as it + // will have already been handled. + if !options.skip_completion { + let st = Command::new("bootc") + .args(["install", "ensure-completion", "--sysroot=."]) + .arg(format!("--stateroot={stateroot}")) + .cwd_dir(sysroot_dir.try_clone()?) + .status()?; + if !st.success() { + anyhow::bail!("Failed to complete bootc install"); + } + } + if !options.no_clean { sysroot.cleanup(cancellable)?; } diff --git a/ostree-ext/src/globals.rs b/ostree-ext/src/globals.rs index ce9c53cab..6960d5d0c 100644 --- a/ostree-ext/src/globals.rs +++ b/ostree-ext/src/globals.rs @@ -87,7 +87,10 @@ pub fn get_global_authfile(root: &Dir) -> Result> { } /// Return the path to the global container authentication file, if it exists. -fn get_global_authfile_impl(root: &RootDir, am_uid0: bool) -> Result> { +pub fn get_global_authfile_impl( + root: &RootDir, + am_uid0: bool, +) -> Result> { let paths = get_config_paths(am_uid0); paths.open_file(root, "auth.json") }