From 970362ae281fb34ba22905a6781c4b2e4453a323 Mon Sep 17 00:00:00 2001 From: Ariel Miculas-Trif Date: Thu, 5 Sep 2024 23:59:55 +0300 Subject: [PATCH] Embed the PuzzleFS metadata files into the PuzzleFS manifest This simplifies the PuzzleFS layout by storing all the metadata information into a single metadata file. The previous layout had one manifest file which contained references to a list of metadata files, each stored separately. Relevant discussions: #55 Signed-off-by: Ariel Miculas-Trif --- puzzlefs-lib/src/builder.rs | 85 ++++----------- puzzlefs-lib/src/format/metadata.capnp | 2 +- puzzlefs-lib/src/format/types.rs | 141 +++++++++++++++++++------ puzzlefs-lib/src/oci.rs | 22 +--- puzzlefs-lib/src/oci/media_types.rs | 10 -- puzzlefs-lib/src/reader/puzzlefs.rs | 63 +++-------- 6 files changed, 146 insertions(+), 177 deletions(-) diff --git a/puzzlefs-lib/src/builder.rs b/puzzlefs-lib/src/builder.rs index f70fa6b..7278022 100644 --- a/puzzlefs-lib/src/builder.rs +++ b/puzzlefs-lib/src/builder.rs @@ -1,8 +1,7 @@ use crate::common::{AVG_CHUNK_SIZE, MAX_CHUNK_SIZE, MIN_CHUNK_SIZE}; use crate::compression::{Compression, Noop, Zstd}; use crate::fsverity_helpers::{ - check_fs_verity, fsverity_enable, get_fs_verity_digest, InnerHashAlgorithm, - FS_VERITY_BLOCK_SIZE_DEFAULT, + check_fs_verity, fsverity_enable, InnerHashAlgorithm, FS_VERITY_BLOCK_SIZE_DEFAULT, }; use crate::oci::Digest; use std::any::Any; @@ -77,7 +76,7 @@ struct Other { additional: Option, } -fn serialize_manifest(rootfs: Rootfs) -> Result> { +fn serialize_metadata(rootfs: Rootfs) -> Result> { let mut message = ::capnp::message::Builder::new_default(); let mut capnp_rootfs = message.init_root::>(); @@ -88,24 +87,6 @@ fn serialize_manifest(rootfs: Rootfs) -> Result> { Ok(buf) } -fn serialize_metadata(inodes: Vec) -> Result> { - let mut message = ::capnp::message::Builder::new_default(); - let capnp_inode_vector = message.init_root::>(); - let inodes_len = inodes.len().try_into()?; - - let mut capnp_inodes = capnp_inode_vector.init_inodes(inodes_len); - - for (i, inode) in inodes.iter().enumerate() { - // we already checked that the length of pfs_inodes fits inside a u32 - let mut capnp_inode = capnp_inodes.reborrow().get(i as u32); - inode.fill_capnp(&mut capnp_inode)?; - } - - let mut buf = Vec::new(); - ::capnp::serialize::write_message(&mut buf, &message)?; - Ok(buf) -} - fn process_chunks( oci: &Image, mut chunker: StreamCDC, @@ -183,7 +164,7 @@ fn build_delta( oci: &Image, mut existing: Option, verity_data: &mut VerityData, -) -> Result { +) -> Result> { let mut dirs = HashMap::::new(); let mut files = Vec::::new(); let mut others = Vec::::new(); @@ -405,13 +386,7 @@ fn build_delta( pfs_inodes.sort_by(|a, b| a.ino.cmp(&b.ino)); - let md_buf = serialize_metadata(pfs_inodes)?; - - let (desc, ..) = oci.put_blob::(md_buf.as_slice())?; - let verity_hash = get_fs_verity_digest(md_buf.as_slice())?; - verity_data.insert(desc.digest.underlying(), verity_hash); - - Ok(desc) + Ok(pfs_inodes) } pub fn build_initial_rootfs( @@ -419,16 +394,10 @@ pub fn build_initial_rootfs( oci: &Image, ) -> Result { let mut verity_data: VerityData = BTreeMap::new(); - let desc = build_delta::(rootfs, oci, None, &mut verity_data)?; - let metadatas = [BlobRef { - offset: 0, - digest: desc.digest.underlying(), - compressed: false, - }] - .to_vec(); - - let rootfs_buf = serialize_manifest(Rootfs { - metadatas, + let inodes = build_delta::(rootfs, oci, None, &mut verity_data)?; + + let rootfs_buf = serialize_metadata(Rootfs { + metadatas: vec![inodes], fs_verity_data: verity_data, manifest_version: PUZZLEFS_IMAGE_MANIFEST_VERSION, })?; @@ -448,21 +417,16 @@ pub fn add_rootfs_delta( let mut verity_data: VerityData = BTreeMap::new(); let pfs = PuzzleFS::open(oci, tag, None)?; let oci = Arc::clone(&pfs.oci); - let mut rootfs = oci.open_rootfs_blob::(tag, None)?; + let mut rootfs = Rootfs::try_from(oci.open_rootfs_blob(tag, None)?)?; - let desc = build_delta::(rootfs_path, &oci, Some(pfs), &mut verity_data)?; - let br = BlobRef { - digest: desc.digest.underlying(), - offset: 0, - compressed: false, - }; + let inodes = build_delta::(rootfs_path, &oci, Some(pfs), &mut verity_data)?; - if !rootfs.metadatas.iter().any(|&x| x == br) { - rootfs.metadatas.insert(0, br); + if !rootfs.metadatas.iter().any(|x| *x == inodes) { + rootfs.metadatas.insert(0, inodes); } rootfs.fs_verity_data.extend(verity_data); - let rootfs_buf = serialize_manifest(rootfs)?; + let rootfs_buf = serialize_metadata(rootfs)?; Ok(( oci.put_blob::(rootfs_buf.as_slice())? .0, @@ -488,9 +452,9 @@ pub fn enable_fs_verity(oci: Image, tag: &str, manifest_root_hash: &str) -> Resu let pfs = PuzzleFS::open(oci, tag, None)?; let oci = Arc::clone(&pfs.oci); - let rootfs = oci.open_rootfs_blob::(tag, None)?; + let rootfs = oci.open_rootfs_blob(tag, None)?; - for (content_addressed_file, verity_hash) in rootfs.fs_verity_data { + for (content_addressed_file, verity_hash) in rootfs.get_verity_data()? { let file_path = oci .blob_path() .join(Digest::new(&content_addressed_file).to_string()); @@ -521,8 +485,6 @@ pub fn build_test_fs(path: &Path, image: &Image) -> Result { pub mod tests { use super::*; - use std::backtrace::Backtrace; - use tempfile::tempdir; use crate::reader::WalkPuzzleFS; @@ -541,12 +503,8 @@ pub mod tests { let dir = tempdir().unwrap(); let image = Image::new(dir.path()).unwrap(); let rootfs_desc = build_test_fs(Path::new("src/builder/test/test-1"), &image).unwrap(); - let rootfs = Rootfs::open( - image - .open_compressed_blob::(&rootfs_desc.digest, None) - .unwrap(), - ) - .unwrap(); + image.add_tag("test-tag", rootfs_desc)?; + let rootfs = image.open_rootfs_blob("test-tag", None).unwrap(); // there should be a blob that matches the hash of the test data, since it all gets input // as one chunk and there's only one file @@ -563,16 +521,11 @@ pub mod tests { ) .unwrap(); - let metadata_digest = rootfs.metadatas[0].try_into().unwrap(); - let blob = image.open_metadata_blob(&metadata_digest, None).unwrap(); let mut inodes = Vec::new(); // we can at least deserialize inodes and they look sane for i in 0..2 { - inodes.push(Inode::from_capnp( - blob.find_inode((i + 1).try_into()?)? - .ok_or(WireFormatError::InvalidSerializedData(Backtrace::capture()))?, - )?); + inodes.push(rootfs.find_inode(i + 1)?); } assert_eq!(inodes[0].ino, 1); @@ -620,7 +573,7 @@ pub mod tests { let (desc, image) = add_rootfs_delta::(&delta_dir, image, tag).unwrap(); let new_tag = "test2"; image.add_tag(new_tag, desc).unwrap(); - let delta = image.open_rootfs_blob::(new_tag, None).unwrap(); + let delta = Rootfs::try_from(image.open_rootfs_blob(new_tag, None).unwrap()).unwrap(); assert_eq!(delta.metadatas.len(), 2); let image = Image::new(dir.path()).unwrap(); diff --git a/puzzlefs-lib/src/format/metadata.capnp b/puzzlefs-lib/src/format/metadata.capnp index d6c7abd..88a59a2 100644 --- a/puzzlefs-lib/src/format/metadata.capnp +++ b/puzzlefs-lib/src/format/metadata.capnp @@ -70,7 +70,7 @@ struct VerityData { } struct Rootfs { - metadatas@0: List(BlobRef); + metadatas@0: List(InodeVector); fsVerityData@1: List(VerityData); manifestVersion@2: UInt64; } diff --git a/puzzlefs-lib/src/format/types.rs b/puzzlefs-lib/src/format/types.rs index 4bea625..6f54a97 100644 --- a/puzzlefs-lib/src/format/types.rs +++ b/puzzlefs-lib/src/format/types.rs @@ -9,7 +9,6 @@ use std::ffi::OsString; use std::fmt; use std::fs; use std::io; -use std::io::Read; use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStringExt; use std::os::unix::fs::{FileTypeExt, MetadataExt, PermissionsExt}; @@ -30,25 +29,25 @@ pub type VerityData = BTreeMap<[u8; SHA256_BLOCK_SIZE], [u8; SHA256_BLOCK_SIZE]> #[derive(Debug)] pub struct Rootfs { - pub metadatas: Vec, + pub metadatas: Vec>, pub fs_verity_data: VerityData, pub manifest_version: u64, } -impl Rootfs { - pub fn open(f: R) -> Result { - let message_reader = serialize::read_message(f, ::capnp::message::ReaderOptions::new())?; - let rootfs = message_reader.get_root::>()?; - Self::from_capnp(rootfs) +impl TryFrom for Rootfs { + type Error = WireFormatError; + fn try_from(rootfs_reader: RootfsReader) -> Result { + Rootfs::from_capnp(rootfs_reader.reader.get()?) } +} +impl Rootfs { pub fn from_capnp(reader: crate::metadata_capnp::rootfs::Reader<'_>) -> Result { - let metadatas = reader.get_metadatas()?; - - let metadata_vec = metadatas + let metadata_vec = reader + .get_metadatas()? .iter() - .map(BlobRef::from_capnp) - .collect::>>()?; + .map(InodeVector::from_capnp) + .collect::>>>()?; let capnp_verities = reader.get_fs_verity_data()?; let mut fs_verity_data = VerityData::new(); @@ -78,7 +77,7 @@ impl Rootfs { for (i, metadata) in self.metadatas.iter().enumerate() { // we already checked that the length of metadatas fits inside a u32 let mut capnp_metadata = capnp_metadatas.reborrow().get(i as u32); - metadata.fill_capnp(&mut capnp_metadata); + InodeVector::fill_capnp(metadata, &mut capnp_metadata)?; } let verity_data_len = self.fs_verity_data.len().try_into()?; @@ -95,6 +94,72 @@ impl Rootfs { } } +pub struct RootfsReader { + reader: message::TypedReader< + ::capnp::serialize::BufferSegments, + crate::metadata_capnp::rootfs::Owned, + >, +} + +impl RootfsReader { + pub fn open(f: fs::File) -> Result { + // We know the loaded message is safe, so we're allowing unlimited reads. + let unlimited_reads = message::ReaderOptions { + traversal_limit_in_words: None, + nesting_limit: 64, + }; + let mmapped_region = unsafe { MmapOptions::new().map_copy_read_only(&f)? }; + let segments = serialize::BufferSegments::new(mmapped_region, unlimited_reads)?; + let reader = message::Reader::new(segments, unlimited_reads).into_typed(); + + Ok(Self { reader }) + } + + pub fn get_manifest_version(&self) -> Result { + Ok(self.reader.get()?.get_manifest_version()) + } + + pub fn get_verity_data(&self) -> Result { + let mut fs_verity_data = VerityData::new(); + + let capnp_verities = self.reader.get()?.get_fs_verity_data()?; + for capnp_verity in capnp_verities { + let digest = capnp_verity.get_digest()?.try_into()?; + let verity = capnp_verity.get_verity()?.try_into()?; + fs_verity_data.insert(digest, verity); + } + Ok(fs_verity_data) + } + + pub fn find_inode(&self, ino: u64) -> Result { + for layer in self.reader.get()?.get_metadatas()?.iter() { + let inode_vector = InodeVector { reader: layer }; + + if let Some(inode) = inode_vector.find_inode(ino)? { + let inode = Inode::from_capnp(inode)?; + if let InodeMode::Wht = inode.mode { + // TODO: seems like this should really be an Option. + return Err(WireFormatError::from_errno(Errno::ENOENT)); + } + return Ok(inode); + } + } + + Err(WireFormatError::from_errno(Errno::ENOENT)) + } + + pub fn max_inode(&self) -> Result { + let mut max: Ino = 1; + for layer in self.reader.get()?.get_metadatas()?.iter() { + let inode_vector = InodeVector { reader: layer }; + if let Some(ino) = inode_vector.max_ino()? { + max = std::cmp::max(ino, max) + } + } + Ok(max) + } +} + // TODO: should this be an ociv1 digest and include size and media type? #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct BlobRef { @@ -699,32 +764,16 @@ impl Xattr { } } -pub struct MetadataBlob { - reader: message::TypedReader< - ::capnp::serialize::BufferSegments, - crate::metadata_capnp::inode_vector::Owned, - >, +pub struct InodeVector<'a> { + reader: crate::metadata_capnp::inode_vector::Reader<'a>, } -impl MetadataBlob { - pub fn new(f: fs::File) -> Result { - // We know the loaded message is safe, so we're allowing unlimited reads. - let unlimited_reads = message::ReaderOptions { - traversal_limit_in_words: None, - nesting_limit: 64, - }; - let mmapped_region = unsafe { MmapOptions::new().map_copy_read_only(&f)? }; - let segments = serialize::BufferSegments::new(mmapped_region, unlimited_reads)?; - let reader = message::Reader::new(segments, unlimited_reads).into_typed(); - - Ok(MetadataBlob { reader }) - } - +impl<'a> InodeVector<'a> { pub fn get_inode_vector( &self, ) -> ::capnp::Result<::capnp::struct_list::Reader<'_, crate::metadata_capnp::inode::Owned>> { - self.reader.get()?.get_inodes() + self.reader.get_inodes() } pub fn find_inode(&self, ino: Ino) -> Result>> { @@ -759,6 +808,32 @@ impl MetadataBlob { let last_index = inodes.len() - 1; Ok(Some(inodes.get(last_index).get_ino())) } + + pub fn from_capnp( + reader: crate::metadata_capnp::inode_vector::Reader<'a>, + ) -> Result> { + reader + .get_inodes()? + .iter() + .map(|inode| Inode::from_capnp(inode)) + .collect() + } + + fn fill_capnp( + inodes: &[Inode], + builder: &mut crate::metadata_capnp::inode_vector::Builder<'_>, + ) -> Result<()> { + let inodes_len = inodes.len().try_into()?; + let mut capnp_inodes = builder.reborrow().init_inodes(inodes_len); + + for (i, inode) in inodes.iter().enumerate() { + // we already checked that the length of pfs_inodes fits inside a u32 + let mut capnp_inode = capnp_inodes.reborrow().get(i as u32); + inode.fill_capnp(&mut capnp_inode)?; + } + + Ok(()) + } } #[derive(Debug, Clone, PartialEq, Eq)] diff --git a/puzzlefs-lib/src/oci.rs b/puzzlefs-lib/src/oci.rs index c28d60b..e03e965 100644 --- a/puzzlefs-lib/src/oci.rs +++ b/puzzlefs-lib/src/oci.rs @@ -11,7 +11,7 @@ use sha2::{Digest as Sha2Digest, Sha256}; use tempfile::NamedTempFile; use crate::compression::{Compression, Decompressor, Noop, Zstd}; -use crate::format::{MetadataBlob, Result, Rootfs, VerityData, WireFormatError, SHA256_BLOCK_SIZE}; +use crate::format::{Result, RootfsReader, VerityData, WireFormatError, SHA256_BLOCK_SIZE}; use openat::Dir; use std::io::{Error, ErrorKind}; @@ -157,15 +157,6 @@ impl Image { C::decompress(f) } - pub fn open_metadata_blob( - &self, - digest: &Digest, - verity: Option<&[u8]>, - ) -> Result { - let f = self.open_raw_blob(digest, verity)?; - MetadataBlob::new(f) - } - pub fn get_image_manifest_fd(&self, tag: &str) -> Result { let index = self.get_index()?; let desc = index @@ -175,17 +166,14 @@ impl Image { Ok(file) } - pub fn open_rootfs_blob( - &self, - tag: &str, - verity: Option<&[u8]>, - ) -> Result { + pub fn open_rootfs_blob(&self, tag: &str, verity: Option<&[u8]>) -> Result { let index = self.get_index()?; let desc = index .find_tag(tag) .ok_or_else(|| io::Error::new(io::ErrorKind::NotFound, format!("no tag {tag}")))?; - let rootfs = Rootfs::open(self.open_compressed_blob::(&desc.digest, verity)?)?; - Ok(rootfs) + + let rootfs = self.open_raw_blob(&desc.digest, verity)?; + RootfsReader::open(rootfs) } pub fn fill_from_chunk( diff --git a/puzzlefs-lib/src/oci/media_types.rs b/puzzlefs-lib/src/oci/media_types.rs index 8a8223a..c80bb5c 100644 --- a/puzzlefs-lib/src/oci/media_types.rs +++ b/puzzlefs-lib/src/oci/media_types.rs @@ -12,16 +12,6 @@ impl MediaType for Rootfs { } } -const PUZZLEFS_INODES: &str = "application/vnd.puzzlefs.image.inodes.v1"; - -pub struct Inodes {} - -impl MediaType for Inodes { - fn name() -> &'static str { - PUZZLEFS_INODES - } -} - const PUZZLEFS_CHUNK_DATA: &str = "application/vnd.puzzlefs.image.layer.puzzlefs.v1"; pub struct Chunk {} diff --git a/puzzlefs-lib/src/reader/puzzlefs.rs b/puzzlefs-lib/src/reader/puzzlefs.rs index 0714772..3618a6b 100644 --- a/puzzlefs-lib/src/reader/puzzlefs.rs +++ b/puzzlefs-lib/src/reader/puzzlefs.rs @@ -6,13 +6,12 @@ use std::os::unix::ffi::OsStrExt; use std::path::{Component, Path}; use std::sync::Arc; -use crate::compression::Noop; use crate::format::{ - DirEnt, Ino, Inode, InodeMode, MetadataBlob, Result, VerityData, WireFormatError, + DirEnt, Ino, Inode, InodeMode, Result, RootfsReader, VerityData, WireFormatError, }; -use crate::oci::{Digest, Image}; +use crate::oci::Image; -pub const PUZZLEFS_IMAGE_MANIFEST_VERSION: u64 = 2; +pub const PUZZLEFS_IMAGE_MANIFEST_VERSION: u64 = 3; pub(crate) fn file_read( oci: &Image, @@ -74,71 +73,42 @@ pub(crate) fn file_read( pub struct PuzzleFS { pub oci: Arc, - layers: Vec, + rootfs: RootfsReader, pub verity_data: Option, pub manifest_verity: Option>, } impl PuzzleFS { pub fn open(oci: Image, tag: &str, manifest_verity: Option<&[u8]>) -> Result { - let rootfs = oci.open_rootfs_blob::(tag, manifest_verity)?; + let rootfs = oci.open_rootfs_blob(tag, manifest_verity)?; - if rootfs.manifest_version != PUZZLEFS_IMAGE_MANIFEST_VERSION { + if rootfs.get_manifest_version()? != PUZZLEFS_IMAGE_MANIFEST_VERSION { return Err(WireFormatError::InvalidImageVersion( format!( "got {}, expected {}", - rootfs.manifest_version, PUZZLEFS_IMAGE_MANIFEST_VERSION + rootfs.get_manifest_version()?, + PUZZLEFS_IMAGE_MANIFEST_VERSION ), Backtrace::capture(), )); } let verity_data = if manifest_verity.is_some() { - Some(rootfs.fs_verity_data) + Some(rootfs.get_verity_data()?) } else { None }; - let layers = rootfs - .metadatas - .iter() - .map(|md| -> Result { - let digest = ::try_from(md)?; - let file_verity = if let Some(verity) = &verity_data { - Some( - &verity.get(&digest.underlying()).ok_or( - WireFormatError::InvalidFsVerityData( - format!("missing verity data {digest}"), - Backtrace::capture(), - ), - )?[..], - ) - } else { - None - }; - oci.open_metadata_blob(&digest, file_verity) - }) - .collect::>>()?; + Ok(PuzzleFS { oci: Arc::new(oci), - layers, + rootfs, verity_data, manifest_verity: manifest_verity.map(|e| e.to_vec()), }) } pub fn find_inode(&self, ino: u64) -> Result { - for layer in self.layers.iter() { - if let Some(inode) = layer.find_inode(ino)? { - let inode = Inode::from_capnp(inode)?; - if let InodeMode::Wht = inode.mode { - // TODO: seems like this should really be an Option. - return Err(WireFormatError::from_errno(Errno::ENOENT)); - } - return Ok(inode); - } - } - - Err(WireFormatError::from_errno(Errno::ENOENT)) + self.rootfs.find_inode(ino) } // lookup performs a path-based lookup in this puzzlefs @@ -174,14 +144,7 @@ impl PuzzleFS { } pub fn max_inode(&self) -> Result { - let mut max: Ino = 1; - for layer in self.layers.iter() { - if let Some(ino) = layer.max_ino()? { - max = std::cmp::max(ino, max) - } - } - - Ok(max) + self.rootfs.max_inode() } }