Skip to content

Commit

Permalink
install: Automatically set up /dev and /var/lib/containers
Browse files Browse the repository at this point in the history
We're looking again at the ergonomics of `bootc install to-existing-root`.
This uses the "mounting into mount namespaces" from the new
mount API to automatically set up `/dev` and `/var/lib/containers`
if they weren't provided to `podman run`, which shrinks what's
needed a bit.

Closes: containers#826

Signed-off-by: Colin Walters <[email protected]>
  • Loading branch information
cgwalters committed Nov 25, 2024
1 parent 7c8121a commit 8cc2909
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 11 deletions.
11 changes: 2 additions & 9 deletions lib/src/install.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,8 @@ async fn prepare_install(
tracing::debug!("Target image reference: {target_imgref}");

// A bit of basic global state setup
crate::mount::ensure_mirrored_host_mount("/dev")?;
crate::mount::ensure_mirrored_host_mount("/var/lib/containers")?;
ensure_var()?;
setup_tmp_mounts()?;
// Allocate a temporary directory we can use in various places to avoid
Expand Down Expand Up @@ -1454,12 +1456,6 @@ async fn install_to_filesystem_impl(state: &State, rootfs: &mut RootSetup) -> Re
.ok_or_else(|| anyhow!("No uuid for boot/root"))?;
tracing::debug!("boot uuid={boot_uuid}");

// If we're doing an alongside install, then the /dev bootupd sees needs to be the host's.
ensure!(
crate::mount::is_same_as_host(Utf8Path::new("/dev"))?,
"Missing /dev mount to host /dev"
);

let bound_images = BoundImages::from_state(state).await?;

// Initialize the ostree sysroot (repo, stateroot, etc.)
Expand Down Expand Up @@ -1514,9 +1510,6 @@ pub(crate) async fn install_to_disk(mut opts: InstallToDiskOpts) -> Result<()> {
block_opts.device
);
}
if !crate::mount::is_same_as_host(Utf8Path::new("/dev"))? {
anyhow::bail!("Loopback mounts (--via-loopback) require host devices (-v /dev:/dev)");
}
} else if !target_blockdev_meta.file_type().is_block_device() {
anyhow::bail!("Not a block device: {}", block_opts.device);
}
Expand Down
148 changes: 146 additions & 2 deletions lib/src/mount.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
//! Helpers for interacting with mountpoints

use std::process::Command;
use std::{
fs,
os::fd::{AsFd, OwnedFd},
process::Command,
};

use anyhow::{anyhow, Result};
use anyhow::{anyhow, Context, Result};
use bootc_utils::CommandRunExt;
use camino::Utf8Path;
use fn_error_context::context;
use rustix::{
mount::{MoveMountFlags, OpenTreeFlags},
net::{
AddressFamily, RecvFlags, SendAncillaryBuffer, SendAncillaryMessage, SendFlags,
SocketFlags, SocketType,
},
process::WaitOptions,
thread::Pid,
};
use serde::Deserialize;

use crate::task::Task;
Expand Down Expand Up @@ -124,3 +137,134 @@ pub(crate) fn is_same_as_host(path: &Utf8Path) -> Result<bool> {
);
Ok(devstat.f_fsid == hostdevstat.f_fsid)
}

/// Given a pid, enter its mount namespace and acquire a file descriptor
/// for a mount from that namespace.
#[allow(unsafe_code)]
#[context("Opening mount tree from pid")]
pub(crate) fn open_tree_from_pidns(
pid: rustix::process::Pid,
path: &Utf8Path,
recursive: bool,
) -> Result<OwnedFd> {
// Allocate a socket pair to use for sending file descriptors.
let (sock_parent, sock_child) = rustix::net::socketpair(
AddressFamily::UNIX,
SocketType::STREAM,
SocketFlags::CLOEXEC,
None,
)
.context("socketpair")?;
const DUMMY_DATA: &[u8] = &[b'!'];
match unsafe { libc::fork() } {
0 => {
// We're in the child. At this point we know we don't have multiple threads, so we
// can safely `setns`.

// Open up the namespace of the target process as a file descriptor, and enter it.
let pidlink = fs::File::open(format!("/proc/{}/ns/mnt", pid.as_raw_nonzero()))?;
rustix::thread::move_into_link_name_space(
pidlink.as_fd(),
Some(rustix::thread::LinkNameSpaceType::Mount),
)
.context("setns")?;

// Open the target mount path as a file descriptor.
let recursive = if recursive {
OpenTreeFlags::AT_RECURSIVE
} else {
OpenTreeFlags::empty()
};
let fd = rustix::mount::open_tree(
rustix::fs::CWD,
path.as_std_path(),
OpenTreeFlags::OPEN_TREE_CLOEXEC | OpenTreeFlags::OPEN_TREE_CLONE | recursive,
)
.context("open_tree")?;

// And send that file descriptor via fd passing over the socketpair.
let fd = fd.as_fd();
let fds = [fd];
let mut buffer = [0u8; rustix::cmsg_space!(ScmRights(1))];
let mut control = SendAncillaryBuffer::new(&mut buffer);
let pushed = control.push(SendAncillaryMessage::ScmRights(&fds));
assert!(pushed);
let ios = std::io::IoSlice::new(DUMMY_DATA);
rustix::net::sendmsg(sock_child, &[ios], &mut control, SendFlags::empty())?;
// Then we're done.
std::process::exit(0)
}
-1 => {
// fork failed
let e = std::io::Error::last_os_error();
anyhow::bail!("failed to fork: {e}");
}
n => {
// We're in the parent; create a pid (checking that n > 0).
let pid = rustix::process::Pid::from_raw(n).unwrap();
// Receive the mount file descriptor from the child
let mut cmsg_space = vec![0; rustix::cmsg_space!(ScmRights(1))];
let mut cmsg_buffer = rustix::net::RecvAncillaryBuffer::new(&mut cmsg_space);
let mut buf = [0u8; DUMMY_DATA.len()];
let iov = std::io::IoSliceMut::new(buf.as_mut());
let mut iov = [iov];
let nread = rustix::net::recvmsg(
sock_parent,
&mut iov,
&mut cmsg_buffer,
RecvFlags::CMSG_CLOEXEC,
)
.context("recvmsg")?
.bytes;
assert_eq!(nread, DUMMY_DATA.len());
assert_eq!(buf, DUMMY_DATA);
// And extract the file descriptor
let r = cmsg_buffer
.drain()
.filter_map(|m| match m {
rustix::net::RecvAncillaryMessage::ScmRights(f) => Some(f),
_ => None,
})
.flatten()
.next()
.ok_or_else(|| anyhow::anyhow!("Did not receive a file descriptor"))?;
rustix::process::waitpid(Some(pid), WaitOptions::empty())?;
Ok(r)
}
}
}

/// Create a bind mount from the mount namespace of the target pid
/// into our mount namespace.
pub(crate) fn bind_mount_from_pidns(
pid: Pid,
src: &Utf8Path,
target: &Utf8Path,
recursive: bool,
) -> Result<()> {
let src = open_tree_from_pidns(pid, src, recursive)?;
rustix::mount::move_mount(
src.as_fd(),
"",
rustix::fs::CWD,
target.as_std_path(),
MoveMountFlags::MOVE_MOUNT_F_EMPTY_PATH,
)
.context("Moving mount")?;
Ok(())
}

// If the target path is not already mirrored from the host (e.g. via -v /dev:/dev)
// then recursively mount it.
pub(crate) fn ensure_mirrored_host_mount(path: impl AsRef<Utf8Path>) -> Result<()> {
let path = path.as_ref();
// If we didn't have this in our filesystem already (e.g. for /var/lib/containers)
// then create it now.
std::fs::create_dir_all(path)?;
if is_same_as_host(path)? {
tracing::debug!("Already mounted from host: {path}");
return Ok(());
}
tracing::debug!("Propagating host mount: {path}");
bind_mount_from_pidns(Pid::from_raw(1).unwrap(), path, path, true)
}

0 comments on commit 8cc2909

Please sign in to comment.