From 99420a36773c25a15633643c1dcc61a51e89e570 Mon Sep 17 00:00:00 2001 From: lschuetze Date: Sat, 12 Feb 2022 23:27:10 +0100 Subject: [PATCH 1/7] 2022 update --- alice-open-data/Cargo.toml | 2 +- histogram/Cargo.toml | 2 +- malice/Cargo.toml | 4 +- malice/src/event.rs | 6 +- root-io/Cargo.toml | 29 +- root-io/README.md | 19 +- root-io/benches/iter_branch.rs | 3 +- root-io/src/core/compression.rs | 51 +++ root-io/src/core/data_source.rs | 13 +- root-io/src/core/file.rs | 160 ++++---- root-io/src/core/file_item.rs | 33 +- root-io/src/core/mod.rs | 32 +- root-io/src/core/parsers.rs | 538 ++++++++++++++------------- root-io/src/core/tkey.rs | 132 ++++--- root-io/src/core/tstreamer.rs | 322 +++++++++------- root-io/src/core/tstreamerinfo.rs | 65 ++-- root-io/src/core/typeid.rs | 75 ++-- root-io/src/core/types.rs | 7 +- root-io/src/lib.rs | 14 +- root-io/src/test_data/README.md | 3 +- root-io/src/tests/basic_io.rs | 7 +- root-io/src/tree_reader/branch.rs | 191 +++++----- root-io/src/tree_reader/container.rs | 86 +++-- root-io/src/tree_reader/leafs.rs | 160 ++++---- root-io/src/tree_reader/mod.rs | 38 +- root-io/src/tree_reader/tree.rs | 233 ++++++------ root-io/tests/high_level_io.rs | 2 +- root-io/tests/read_esd.rs | 10 +- root-io/tests/read_simple.rs | 19 +- root-ls/Cargo.toml | 4 +- root-ls/src/main.rs | 3 +- 31 files changed, 1241 insertions(+), 1022 deletions(-) create mode 100644 root-io/src/core/compression.rs diff --git a/alice-open-data/Cargo.toml b/alice-open-data/Cargo.toml index 7ad0975..fa7aed0 100644 --- a/alice-open-data/Cargo.toml +++ b/alice-open-data/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "alice-open-data" -version = "0.5.0" +version = "0.5.1" authors = ["cbourjau "] description = "Tools to download and manage the publicly released ALICE open data" repository = "https://github.com/cbourjau/alice-rs" diff --git a/histogram/Cargo.toml b/histogram/Cargo.toml index 1bf860b..61e8d4f 100644 --- a/histogram/Cargo.toml +++ b/histogram/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "histogram" -version = "0.1.0" +version = "0.1.1" authors = ["cbourjau "] edition = "2018" diff --git a/malice/Cargo.toml b/malice/Cargo.toml index ba23bd6..eabcd26 100644 --- a/malice/Cargo.toml +++ b/malice/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "malice" -version = "0.3.0" +version = "0.3.1" authors = ["cbourjau "] exclude = ["benches/*"] description = "A small framwork providing sensible defaults to analyse ALICE's open data" @@ -19,7 +19,7 @@ cpp = ["alice-sys"] bitflags = "1" failure = "0.1" futures = "0.3" -nom = "^5" +nom = "7" root-io = { version="0.3", path="../root-io" } # Optional dependencies alice-sys = { version="0.1", optional = true } diff --git a/malice/src/event.rs b/malice/src/event.rs index eb69343..de7586c 100644 --- a/malice/src/event.rs +++ b/malice/src/event.rs @@ -4,10 +4,10 @@ use std::fmt::Debug; use failure::Error; use futures::prelude::*; use itertools::izip; -use nom::{combinator::map, error::ParseError, number::complete::*, sequence::tuple, IResult}; +use nom::{combinator::map, number::complete::*, sequence::tuple, IResult}; use wasm_bindgen::prelude::*; -use root_io::core::parsers::{parse_custom_mantissa, parse_tobjarray_of_tnameds}; +use root_io::core::parsers::{parse_custom_mantissa, parse_tobjarray_of_tnameds, RootError}; use root_io::stream_zip; use root_io::tree_reader::Tree; @@ -243,7 +243,7 @@ fn string_to_mask(s: &str, run_number: i32) -> TriggerMask { fn parse_pid_probabilities<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], PidProbabilities, E> where - E: ParseError<&'s [u8]> + Debug, + E: RootError<&'s [u8]>, { let (input, electron) = parse_custom_mantissa(input, 8)?; let (input, muon) = parse_custom_mantissa(input, 8)?; diff --git a/root-io/Cargo.toml b/root-io/Cargo.toml index 4d2da48..1e48c65 100644 --- a/root-io/Cargo.toml +++ b/root-io/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "root-io" -version = "0.3.0" +version = "0.3.1" authors = ["cbourjau "] exclude = ["*test_data/", "*.root"] description = "Reading of `.root` binary files which are commonly used in particle physics" @@ -13,31 +13,30 @@ edition = "2018" [dependencies] -alice-open-data = { version="0.5", path="../alice-open-data/" } -bitflags = "1.0.0" -failure = "0.1.5" +alice-open-data = { version = "0.5", path = "../alice-open-data/" } +bitflags = "1.3" flate2 = "^1" futures = "0.3" -lzma-rs = "0.1.1" -quote = "0.3.15" -reqwest = "0.10" +lzma-rs = "0.2" +quote = "0.3" +reqwest = "0.11" uuid = "0.8.2" lz4-compress = "0.1.1" - -[dependencies.nom] -version = "^5" +nom = "7" +nom-supreme = "0.6" +thiserror = "1" [target.'cfg(target_arch = "wasm32")'.dependencies] -wasm-bindgen-futures = "0.4.10" +wasm-bindgen-futures = "0.4" [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] criterion = "0.3" -tokio = { version = "0.2", features = ["macros"] } +tokio = { version = "1.15", features = ["macros"] } [target.'cfg(target_arch = "wasm32")'.dev-dependencies] -wasm-bindgen = "0.2.60" -wasm-bindgen-test = "0.3.10" -web-sys = {"version" = "0.3.4", "features" = [ 'console' ]} +wasm-bindgen = "0.2" +wasm-bindgen-test = "0.3" +web-sys = { "version" = "0.3", "features" = ['console'] } # Run with cargo bench --bench iter_branch -- --baseline pathbuf-in-container --color never [[bench]] diff --git a/root-io/README.md b/root-io/README.md index 7fbd95c..b4f8403 100644 --- a/root-io/README.md +++ b/root-io/README.md @@ -2,18 +2,21 @@ [![Crates.io Version](https://img.shields.io/crates/v/root-io.svg)](https://crates.io/crates/root-io) - ## Documentation + https://docs.rs/root-io -`root-io` provides basic support for reading data stored in binary `.root` files commonly used in particle physics experiments. This crates provides: +`root-io` provides basic support for reading data stored in binary `.root` files commonly used in particle physics +experiments. This crates provides: - - Core types and parsers to read the layout description of custom classes contained in a given file - - Tools to generate `yaml` describing the streamed objects (aka. `TStreamerInfo`) - - Tools to generate (buggy) `Rust` code as a starting point for a new parser - - Set of types and parsers needed to read so-called `TTree`s - -The majority of the exposed API serves the latter point; striving to enable an easy iteration over data stored in `TTree`s. In particular, `root-io` supports reading `TBranches` (i.e. akin to "columns" of a database) with a variable number of elements in each entry (i.e. `TBranches` of `TClonesArray`). +- Core types and parsers to read the layout description of custom classes contained in a given file +- Tools to generate `yaml` describing the streamed objects (aka. `TStreamerInfo`) +- Tools to generate (buggy) `Rust` code as a starting point for a new parser +- Set of types and parsers needed to read so-called `TTree`s + +The majority of the exposed API serves the latter point; striving to enable an easy iteration over data stored +in `TTree`s. In particular, `root-io` supports reading `TBranches` (i.e. akin to "columns" of a database) with a +variable number of elements in each entry (i.e. `TBranches` of `TClonesArray`). The `root-ls` crate utilizes this crate to in a CLI to inspect a given root file and to deploy the code-gen tools. diff --git a/root-io/benches/iter_branch.rs b/root-io/benches/iter_branch.rs index db37238..55edc39 100644 --- a/root-io/benches/iter_branch.rs +++ b/root-io/benches/iter_branch.rs @@ -3,10 +3,9 @@ extern crate criterion; extern crate nom; extern crate root_io; -use nom::number::complete::{be_f32, be_i32, be_u32}; - use criterion::{black_box, criterion_group, criterion_main, Criterion}; use futures::StreamExt; +use nom::number::complete::{be_f32, be_i32, be_u32}; use tokio::runtime::Runtime; use root_io::RootFile; diff --git a/root-io/src/core/compression.rs b/root-io/src/core/compression.rs new file mode 100644 index 0000000..b9fba0f --- /dev/null +++ b/root-io/src/core/compression.rs @@ -0,0 +1,51 @@ +use flate2::bufread::ZlibDecoder; +use lzma_rs::xz_decompress; +use thiserror::Error; + +use DecompressionError::*; +use std::*; +use std::io::Read; + +#[derive(Error, Debug)] +pub enum DecompressionError { + #[error("Header too short")] + InsufficientData, + #[error("Compression algorithm '{0}' not supported")] + AlgorithmNotImplemented(String), + #[error("Failed to decompress LZMA section")] + LzmaFailure(#[from] lzma_rs::error::Error), + #[error("Failed to decompress LZ4 section")] + Lz4Failure, + #[error("Failed to decompress ZLib section")] + ZLibFailure(#[from] std::io::Error), +} + +pub(crate) fn decompress(input: &[u8]) -> Result, DecompressionError> { + if input.len() < 9 { return Err(InsufficientData); } + + // There is something in bytes 2..=8, but we haven't identified it yet + let magic = &input[..2]; + let compressed = &input[9..]; + + let mut ret = vec![]; + + match magic { + b"ZL" => { + let mut decoder = ZlibDecoder::new(compressed); + decoder.read_to_end(&mut ret)?; + Ok(ret) + } + b"XZ" => { + let mut reader = std::io::BufReader::new(compressed); + xz_decompress(&mut reader, &mut ret)?; + Ok(ret) + } + b"L4" => { + // TODO checksum verification? + // skip leading u64 + lz4_compress::decompress(&compressed[8..]).map_err(|_| Lz4Failure) + } + other => Err(AlgorithmNotImplemented(String::from_utf8(other.to_vec()) + .unwrap_or(format!("Bad magic {other:?}")))) + } +} \ No newline at end of file diff --git a/root-io/src/core/data_source.rs b/root-io/src/core/data_source.rs index e671056..e096f36 100644 --- a/root-io/src/core/data_source.rs +++ b/root-io/src/core/data_source.rs @@ -1,14 +1,15 @@ +use reqwest::{ + Client, + header::{RANGE, USER_AGENT}, Url, +}; + use std::fs::File; use std::io::{Read, Seek, SeekFrom}; #[cfg(not(target_arch = "wasm32"))] use std::path::Path; use std::path::PathBuf; -use failure::Error; -use reqwest::{ - header::{RANGE, USER_AGENT}, - Client, Url, -}; +use crate::tree_reader::ReadError; /// The source from where the Root file is read. Construct it using /// `.into()` on a `Url` or `Path`. The latter is not availible for @@ -33,7 +34,7 @@ impl Source { thing.into() } - pub async fn fetch(&self, start: u64, len: u64) -> Result, Error> { + pub async fn fetch(&self, start: u64, len: u64) -> Result, ReadError> { match &self.0 { SourceInner::Local(path) => { let mut f = File::open(&path)?; diff --git a/root-io/src/core/file.rs b/root-io/src/core/file.rs index 69b9fff..90388a5 100644 --- a/root-io/src/core/file.rs +++ b/root-io/src/core/file.rs @@ -1,22 +1,19 @@ -use std::fmt; - -use failure::Error; -use nom::{ - self, - bytes::complete::tag, - error::VerboseError, - number::complete::{be_i16, be_i32, be_u128, be_u16, be_u32, be_u64, be_u8}, - IResult, -}; - +use nom::{self, + IResult, + number::complete::{be_i16, be_i32, be_u128, be_u16, be_u32, be_u64, be_u8}, Parser}; +use nom::sequence::tuple; +use nom_supreme::{ParserExt, tag::complete::tag}; use uuid::Uuid; +use std::fmt; + use crate::{ code_gen::rust::{ToNamedRustParser, ToRustStruct}, - core::tstreamer::streamers, core::*, + core::tstreamer::streamers, MAP_OFFSET, }; +use crate::tree_reader::{ReadError, WriteError}; /// Size of serialized `FileHeader` in bytes const FILE_HEADER_SIZE: u64 = 75; @@ -63,8 +60,8 @@ pub struct Directory { } /// Parse opening part of a root file -fn file_header(i: &[u8]) -> IResult<&[u8], FileHeader> { - fn version_dep_int(i: &[u8], is_64_bit: bool) -> IResult<&[u8], u64> { +fn file_header<'s, E: RootError<&'s [u8]>>(i: &'s [u8]) -> IResult<&'s [u8], FileHeader, E> { + fn version_dep_int<'s, E: RootError<&'s [u8]>>(i: &'s [u8], is_64_bit: bool) -> IResult<&'s [u8], u64, E> { if is_64_bit { be_u64(i) } else { @@ -111,65 +108,68 @@ fn file_header(i: &[u8]) -> IResult<&[u8], FileHeader> { } /// Parse a file-pointer based on the version of the file -fn versioned_pointer(input: &[u8], version: i16) -> nom::IResult<&[u8], u64> { - if version > 1000 { - be_u64(input) - } else { - map!(input, be_i32, |val| val as u64) +fn versioned_pointer<'s, E>(version: i16) -> impl nom::Parser<&'s [u8], u64, E> + where + E: RootError<&'s [u8]> +{ + move |i| { + if version > 1000 { + be_u64.parse(i) + } else { + be_u32.map(|v| v as u64).parse(i) + } } } -#[rustfmt::skip::macros(do_parse)] -named!( - #[doc="Directory within a root file; exists on ever file"], - directory<&[u8], Directory>, - do_parse!( - version: be_i16 >> - c_time: be_u32 >> - m_time: be_u32 >> - n_bytes_keys: be_i32 >> - n_bytes_name: be_i32 >> - seek_dir: call!(versioned_pointer, version) >> - seek_parent: call!(versioned_pointer, version) >> - seek_keys: call!(versioned_pointer, version) >> - ({ - Directory {version, c_time, m_time, n_bytes_keys, - n_bytes_name, seek_dir, seek_parent, seek_keys, - }}) - ) -); +/// Directory within a root file; exists on ever file +fn directory<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], Directory, E> + where + E: RootError<&'s [u8]> +{ + tuple(( + be_i16.context("directory version"), + be_u32.context("directory time created"), + be_u32.context("directory time modified"), + be_i32.context("directory key byte count"), + be_i32.context("directory name byte count") + )).flat_map(make_fn(|(version, c_time, m_time, n_bytes_keys, n_bytes_name)| { + tuple(( + versioned_pointer(version).context("seek dir"), + versioned_pointer(version).context("seek parent"), + versioned_pointer(version).context("seek keys") + )).map(move |(seek_dir, seek_parent, seek_keys)| + Directory { + version, + c_time, + m_time, + n_bytes_keys, + n_bytes_name, + seek_dir, + seek_parent, + seek_keys, + }) + })).context("ROOT directory").parse(input) +} + impl RootFile { /// Open a new ROOT file either from a `Url`, or from a `Path` /// (not available on `wasm32`). - pub async fn new>(source: S) -> Result { + pub async fn new>(source: S) -> Result { let source = source.into(); - let hdr = source.fetch(0, FILE_HEADER_SIZE).await.and_then(|buf| { - file_header(&buf) - .map_err(|_| format_err!("Failed to parse file header")) - .map(|(_i, o)| o) - })?; + let hdr_buf = source.fetch(0, FILE_HEADER_SIZE).await?; + let hdr = wrap_parser(file_header.all_consuming())(&hdr_buf)?; + //let hdr = _hdr?; + // Jump to the TDirectory and parse it - let dir = source - .fetch(hdr.seek_dir, TDIRECTORY_MAX_SIZE) - .await - .and_then(|buf| { - directory(&buf) - .map_err(|_| format_err!("Failed to parse TDirectory")) - .map(|(_i, o)| o) - })?; - let tkey_of_keys = source - .fetch(dir.seek_keys, dir.n_bytes_keys as u64) - .await - .and_then(|buf| { - tkey(&buf) - .map_err(|_| format_err!("Failed to parse TKeys")) - .map(|(_i, o)| o) - })?; - let keys = match tkey_headers(&tkey_of_keys.obj) { - Ok((_, hdrs)) => Ok(hdrs), - _ => Err(format_err!("Expected TKeyHeaders")), - }?; + let dir_buf = source.fetch(hdr.seek_dir, TDIRECTORY_MAX_SIZE).await?; + let dir = wrap_parser(directory)(&dir_buf)?; + + let tkey_buf = source.fetch(dir.seek_keys, dir.n_bytes_keys as u64).await?; + let tkey_of_keys = wrap_parser(tkey.all_consuming())(&tkey_buf)?; + + let keys = wrap_parser(tkey_headers.all_consuming())(&tkey_of_keys.obj)?; + let items = keys .iter() .map(|k_hdr| FileItem::new(k_hdr, source.clone())) @@ -178,13 +178,13 @@ impl RootFile { Ok(RootFile { source, hdr, items }) } - pub async fn get_streamer_context(&self) -> Result { + pub async fn get_streamer_context(&self) -> Result { let seek_info_len = (self.hdr.nbytes_info + 4) as u64; - let info_key = self + let info_key_buf = self .source .fetch(self.hdr.seek_info, seek_info_len) - .await - .map(|buf| tkey(&buf).unwrap().1)?; + .await?; + let info_key = wrap_parser(tkey.all_consuming())(&info_key_buf)?; let key_len = info_key.hdr.key_len; Ok(Context { @@ -199,17 +199,16 @@ impl RootFile { &self.items } - /// Translate the streamer info of this file to a YAML file - pub async fn streamer_infos(&self) -> Result, Error> { + /// Get the stream info of this file + pub async fn streamer_infos(&self) -> Result, ReadError> { let ctx = self.get_streamer_context().await?; let buf = ctx.s.as_slice(); - let (_, streamer_vec) = streamers::>(buf, &ctx) - .map_err(|_| format_err!("Failed to parse TStreamers"))?; - Ok(streamer_vec) + let res = wrap_parser(streamers(&ctx))(buf)?; + Ok(res) } /// Translate the streamer info of this file to a YAML file - pub async fn streamer_info_as_yaml(&self, s: &mut W) -> Result<(), Error> { + pub async fn streamer_info_as_yaml(&self, s: &mut W) -> Result<(), WriteError> { for el in &self.streamer_infos().await? { writeln!(s, "{:#}", el.to_yaml())?; } @@ -217,7 +216,7 @@ impl RootFile { } /// Generate Rust code from the streamer info of this file - pub async fn streamer_info_as_rust(&self, s: &mut W) -> Result<(), Error> { + pub async fn streamer_info_as_rust(&self, s: &mut W) -> Result<(), WriteError> { // Add necessary imports at the top of the file writeln!( s, @@ -229,7 +228,6 @@ impl RootFile { use parsers::utils::*; use core_types::*; } - .to_string() )?; let streamer_infos = self.streamer_infos().await?; // generate structs @@ -257,16 +255,17 @@ impl RootFile { #[cfg(all(test, not(target_arch = "wasm32")))] mod test { - use super::*; - use std::path::Path; - use nom::error::VerboseError; use nom::multi::length_value; use reqwest::Url; use tokio; + use std::path::Path; + + use super::*; + const SIMPLE_FILE_REMOTE: &str = - "https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true"; + "https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true"; #[tokio::test] async fn read_cms_file_remote() { @@ -402,6 +401,7 @@ mod test { #[tokio::test] async fn streamerinfo_test_local() { + let local = Path::new("./src/test_data/simple.root").into(); streamerinfo_test(local).await; } diff --git a/root-io/src/core/file_item.rs b/root-io/src/core/file_item.rs index 2b97b42..8459b74 100644 --- a/root-io/src/core/file_item.rs +++ b/root-io/src/core/file_item.rs @@ -1,8 +1,9 @@ -use failure::Error; -use nom::{error::VerboseError, multi::length_value}; +use nom::multi::length_value; +use nom_supreme::ParserExt; -use crate::core::{checked_byte_count, decompress, Context, Source, TKeyHeader}; -use crate::tree_reader::{ttree, Tree}; +use crate::core::{checked_byte_count, Context, Source, TKeyHeader, wrap_parser}; +use crate::core::compression::decompress; +use crate::tree_reader::{ReadError, Tree, ttree}; /// Describes a single item within this file (e.g. a `Tree`) #[derive(Debug)] @@ -31,22 +32,21 @@ impl FileItem { ) } - async fn get_buffer(&self) -> Result, Error> { + async fn get_buffer(&self) -> Result, ReadError> { let start = self.tkey_hdr.seek_key + self.tkey_hdr.key_len as u64; let len = self.tkey_hdr.total_size - self.tkey_hdr.key_len as u32; let comp_buf = self.source.fetch(start, len as u64).await?; let buf = if self.tkey_hdr.total_size < self.tkey_hdr.uncomp_len { // Decompress the read buffer; buf is Vec - let (_, buf) = decompress(comp_buf.as_slice()).unwrap(); - buf + decompress(comp_buf.as_slice())? } else { comp_buf }; Ok(buf) } - pub(crate) async fn get_context<'s>(&self) -> Result { + pub(crate) async fn get_context<'s>(&self) -> Result { let buffer = self.get_buffer().await?; let k_map_offset = 2; Ok(Context { @@ -57,26 +57,23 @@ impl FileItem { } /// Parse this `FileItem` as a `Tree` - pub async fn as_tree(&self) -> Result { + pub async fn as_tree(&self) -> Result { let ctx = self.get_context().await?; let buf = ctx.s.as_slice(); - let res = length_value(checked_byte_count, |i| ttree::>(i, &ctx))(buf); - match res { - Ok((_, obj)) => Ok(obj), - Err(nom::Err::Error(e)) | Err(nom::Err::Failure(e)) => { - Err(format_err!("Supplied parser failed! {:?}", e.errors)) - } - _ => panic!(), - } + let res = wrap_parser( + length_value(checked_byte_count, ttree(&ctx)).all_consuming() + )(buf)?; + Ok(res) } } #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { - use crate::core::RootFile; use std::path::Path; + use crate::core::RootFile; + #[tokio::test] async fn open_simple() { let path = Path::new("./src/test_data/simple.root"); diff --git a/root-io/src/core/mod.rs b/root-io/src/core/mod.rs index e2a03c7..e06822d 100644 --- a/root-io/src/core/mod.rs +++ b/root-io/src/core/mod.rs @@ -1,6 +1,19 @@ //! This module contains the core structs and parsers needed to read //! the self-description of a root file. These parsers can be used to //! build new parsers using the [root-ls](https://github.com/cbourjau/alice-rs) cli. +use thiserror::Error; + +pub(crate) use self::compression::*; +pub use self::compression::DecompressionError; +pub use self::data_source::Source; +pub use self::file::RootFile; +pub use self::file_item::FileItem; +pub(crate) use self::parsers::*; +pub(crate) use self::tkey::*; +pub(crate) use self::tstreamer::{tstreamer, TStreamer}; +pub(crate) use self::tstreamerinfo::{tstreamerinfo, TStreamerInfo}; +pub(crate) use self::typeid::*; +pub(crate) use self::types::*; mod data_source; mod file; @@ -11,14 +24,15 @@ mod tstreamer; mod tstreamerinfo; mod typeid; pub mod types; +mod compression; -pub(crate) use self::parsers::*; -pub(crate) use self::tkey::*; -pub(crate) use self::tstreamer::{tstreamer, TStreamer}; -pub(crate) use self::tstreamerinfo::{tstreamerinfo, TStreamerInfo}; -pub(crate) use self::typeid::*; -pub(crate) use self::types::*; +#[derive(Error, Debug)] +pub enum SemanticError { + #[error("Unsupported version {1} for {0:?} ({2})")] + VersionNotSupported(Component, u32, &'static str) +} -pub use self::data_source::Source; -pub use self::file::RootFile; -pub use self::file_item::FileItem; +#[derive(Debug)] +pub enum Component { + TStreamerElement +} \ No newline at end of file diff --git a/root-io/src/core/parsers.rs b/root-io/src/core/parsers.rs index abbc494..50572fa 100644 --- a/root-io/src/core/parsers.rs +++ b/root-io/src/core/parsers.rs @@ -1,29 +1,65 @@ -use std::convert::TryFrom; -use std::fmt::Debug; -use std::io::Read; +use nom::{self, bytes::complete::{take, take_until}, combinator::{map_res, rest}, error::ParseError, IResult, multi::{count, length_data, length_value}, number::complete::{be_i32, be_u16, be_u32, be_u8}, Parser, sequence::{pair, tuple}}; +use nom::branch::alt; +use nom::combinator::{cond, eof}; +use nom::error::{ContextError, FromExternalError, VerboseError}; +use nom::multi::length_count; +use nom_supreme::parser_ext::ParserExt; +use nom_supreme::tag::TagError; + /// Parsers of the ROOT core types. Note that objects in ROOT files /// are often, but not always, preceeded by their size. The parsers in /// this module do therefore not included this leading size /// information. Usually, the user will want to do that with something /// along the lines of `length_value!(checked_byte_count, tobject)` /// themselves. +use std::convert::{TryFrom, TryInto}; +use std::fmt::Debug; +use std::result::Result::Ok; use std::str; -use failure::Error; -use flate2::bufread::ZlibDecoder; -use lzma_rs::xz_decompress; -use nom::{ - self, - bytes::complete::{take, take_until}, - combinator::{map, map_res, rest, verify}, - error::ParseError, - multi::{count, length_data, length_value}, - number::complete::{be_i32, be_u16, be_u32, be_u64, be_u8}, - sequence::{pair, tuple}, - IResult, -}; - use crate::core::*; +use crate::core::compression::DecompressionError; + +pub trait RootError: ParseError ++ ContextError ++ TagError ++ FromExternalError ++ FromExternalError ++ FromExternalError ++ FromExternalError ++ Debug {} + +impl ++ ContextError ++ TagError ++ FromExternalError ++ FromExternalError ++ FromExternalError ++ FromExternalError ++ Debug> RootError for T {} + + +/// Corerce a closure to a Fn, for use with map_res et al. +pub(crate) fn make_fn U>(f: F) -> F { + f +} + + +pub(crate) fn wrap_parser<'s, O>(parser: impl Parser<&'s [u8], O, VerboseError<&'s [u8]>>) -> impl FnMut(&'s [u8]) -> Result>> +{ + let mut parser = parser.complete(); + + move |input| match parser.parse(input) { + Ok((_, parsed)) => Ok(parsed), + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + let errors = err.errors.iter().map(|(i, kind)| (i.to_vec(), kind.clone())).collect(); + Err(VerboseError { errors }) + }, + Err(nom::Err::Incomplete(..)) => { + unreachable!("Complete combinator should make this impossible") + } + } +} #[allow(clippy::trivially_copy_pass_by_ref)] fn is_byte_count(v: &u32) -> bool { @@ -32,203 +68,176 @@ fn is_byte_count(v: &u32) -> bool { /// Return the size in bytes of the following object in the input. The /// count is the remainder of this object minus the size of the count. -pub fn checked_byte_count<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], u32, E> -where - E: ParseError<&'s [u8]> + Debug, +pub fn checked_byte_count<'s, E>(input: &'s [u8]) -> IResult<&[u8], u32, E> + where + E: RootError<&'s [u8]>, { - verify( - map(verify(be_u32, is_byte_count), |v| { - v & !Flags::BYTE_COUNT_MASK.bits() - }), - |v| *v != 0, - )(input) + be_u32.verify(is_byte_count).context("does not match bytecount mask") + .map(|v| v & Flags::BYTE_COUNT_MASK.bits()) + .verify(|&v| v != 0).context("byte count must not be 0") + .verify(|&v| v < 0x4000_0000).context("highest bit in byte count must be unset") + .parse(input) +} + +/// Read ROOT's string length prefix, which is usually a u8, but can be extended +/// to a u32 (for a total of 5 bytes) if the first byte is 255 +fn string_length_prefix<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], u32, E> + where + E: RootError<&'s [u8]>, +{ + alt(( + be_u8.verify(|&v| v == 255).precedes(be_u32).cut().context("extended string length prefix"), + be_u8.verify(|&v| v != 255).map(|v| v as u32).context("short string length prefix") + ))(input) } /// Read ROOT's version of short and long strings (preceeded by u8). Does not read null terminated! -#[rustfmt::skip::macros(do_parse)] -pub fn string<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], String, E> -where - E: ParseError<&'s [u8]> + Debug, +pub fn string<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], &'s str, E> + where + E: RootError<&'s [u8]>, { - do_parse!(input, - len: switch!(be_u8, - 255 => call!(be_u32) | - a => value!(u32::from(a))) >> - s: map!( - map_res!(take!(len), str::from_utf8), - |s| s.to_string()) >> - (s) - ) + length_data(string_length_prefix) + .map_res(str::from_utf8) + .context("length-prefixed string") + .parse(input) } /// Parser for the most basic of ROOT types pub fn tobject<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], TObject, E> -where - E: ParseError<&'s [u8]> + Debug, + where + E: RootError<&'s [u8]>, { - do_parse!( - input, - ver: be_u16 >> // version_consume_extra_virtual >> - id: be_u32 >> - bits: map!(be_u32, |v| { - // TObjects read from disc must have the ON_HEAP flag - TObjectFlags::from_bits_truncate(v| TObjectFlags::IS_ON_HEAP.bits())} - ) >> - _ref: cond!(bits.intersects(TObjectFlags::IS_REFERENCED), be_u16) >> - ({TObject { - ver, id, bits - }}) - ) + tuple(( + be_u16.context("tobject version"), + be_u32.context("tobject id"), + be_u32.context("tobject flags") + .map(|v| TObjectFlags::from_bits_truncate(v | TObjectFlags::IS_ON_HEAP.bits())) + )) + .flat_map(make_fn(|(ver, id, bits): (u16, u32, TObjectFlags)| + cond(bits.intersects(TObjectFlags::IS_REFERENCED), be_u16.context("tobject reference")) + .map(move |_ref| TObject { ver, id, bits, _ref }))) + .parse(input) } /// Parse a `TList` -pub fn tlist<'s, E>(i: &'s [u8], ctx: &'s Context) -> IResult<&'s [u8], Vec>, E> -where - E: ParseError<&'s [u8]> + Debug, +pub fn tlist<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Vec>, E> + where + E: RootError<&'s [u8]>, { - let (i, _ver) = verify(be_u16, |&v| v == 5)(i)?; - let (i, (_tobj, _name, len)) = tuple((tobject, string, be_i32))(i)?; - let (i, objs) = count( - |i| { - let wrapped_raw = |i| raw(i, ctx); - let (i, obj) = length_value(checked_byte_count, wrapped_raw)(i)?; - let (i, _) = length_data(be_u8)(i)?; - Ok((i, obj)) + RootContextParser { + context, + parser: |ctx, inpt| { + let (i, _ver) = be_u16.context("tlist version") + .verify(|&v| v == 5).context("tlist version must be 5").parse(inpt)?; + let (i, (_tobj, _name, num_obj)) = tuple((tobject, string, be_i32))(i)?; + let (i, objs) = count( + |i| { + let (i, obj) = length_value(checked_byte_count, raw(ctx)) + .context("entry in tlist") + .parse(i)?; + // TODO verify remaining entry data + let (i, _) = length_data(be_u8)(i)?; + Ok((i, obj)) + }, + num_obj as usize, + )(i)?; + + // TODO: Verify rest + let (i, _) = rest(i)?; + Ok((i, objs)) }, - len as usize, - )(i)?; - let (i, _) = rest(i)?; - Ok((i, objs)) + }//.context("tlist") } /// Parser for `TNamed` objects #[rustfmt::skip::macros(do_parse)] pub fn tnamed<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], TNamed, E> -where - E: ParseError<&'s [u8]> + Debug, + where + E: RootError<&'s [u8]>, { - do_parse!(input, - _ver: be_u16 >> - _tobj: tobject >> - name: string >> - title: string >> - ({TNamed{name, title}}) - ) + tuple(( + be_u16.context("version"), + tobject.context("object header"), + string.context("name"), + string.context("title") + )).context("named tobject") + .map(|(_, _, name, title)| TNamed { name: name.to_string(), title: title.to_string() }) + .parse(input) } /// Parse a `TObjArray` -#[rustfmt::skip::macros(do_parse)] -pub fn tobjarray<'s, E, F, O>( - parser: F, - i: &'s [u8], - context: &'s Context, -) -> nom::IResult<&'s [u8], Vec, E> -where - F: Fn(&Raw<'s>, &'s Context) -> nom::IResult<&'s [u8], O, E>, - E: ParseError<&'s [u8]> + Debug, +pub fn tobjarray<'s, E, F, P, O>(parser: F, context: &'s Context) -> impl Fn(&'s [u8]) -> IResult<&'s [u8], Vec, E> + where + F: Fn(&'s Context) -> P, + P: Parser, O, E>, + E: RootError<&'s [u8]>, { - let (i, _ver) = be_u16(i)?; - let (i, _tobj) = tobject(i)?; - let (i, _name) = c_string(i)?; - let (i, size) = be_i32(i)?; - let (i, _low) = be_i32(i)?; - let (i, objs) = count( - map_res( - |i| raw(i, context), - |r| { - let res = parser(&r, context).map(|(_i, res)| res); - if res.is_err() { - res.as_ref().unwrap(); - } - res - }, - ), - size as usize, - )(i)?; - Ok((i, objs)) + make_fn(move |i| { + let (i, _ver) = be_u16(i)?; + let (i, _tobj) = tobject(i)?; + let (i, _name) = c_string(i)?; + let (i, size) = be_i32(i)?; + let (i, _low) = be_i32(i)?; + let (i, objs): (&'s [u8], Vec) = count( + raw(context).and_then(parser(context)), + size as usize, + )(i)?; + Ok((i, objs)) + }) } /// Parse a `TObjArray` which does not have references pointing outside of the input buffer -#[rustfmt::skip::macros(do_parse)] -pub fn tobjarray_no_context<'s, E>( - input: &'s [u8], -) -> nom::IResult<&'s [u8], Vec<(ClassInfo, &'s [u8])>, E> -where - E: ParseError<&'s [u8]> + Debug, +pub fn tobjarray_no_context<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], Vec<(ClassInfo, &'s [u8])>, E> + where + E: RootError<&'s [u8]>, { - do_parse!(input, - _ver: be_u16 >> - _tobj: tobject >> - _name: c_string >> - _size: be_i32 >> - _low: be_i32 >> - objs: map!(count!(raw_no_context, _size as usize), - |v| v.into_iter().map(|(ci, s)| (ci, s)).collect()) >> - (objs) - ) + tuple(( + be_u16.context("TObjArray header version"), + tobject.context("TObjArray object header"), + c_string.context("TObjArray name"), + be_i32.context("TObjArray num objects"), + be_i32.context("TObjArray unknown") + )).flat_map(make_fn(|(_, _, _, num_objects, _): (u16, TObject, &str, i32, i32)| + count(raw_no_context, num_objects.try_into().unwrap()))) + .context("TObjArray") + .parse(input) + // |v| v.into_iter().map(|(ci, s)| (ci, s)).collect()) >> } -#[rustfmt::skip::macros(do_parse)] -named!( - #[doc="Parser for `TObjString`"], - pub tobjstring<&[u8], String>, - do_parse!(_ver: be_u16 >> - _tobj: tobject >> - name: string >> - _eof: eof!() >> - ({name}) - ) -); - -/// Parse a so-called `TArray`. Note that ROOT's `TArray`s are actually not fixed size. -/// Example usage for TArrayI: `tarray(nom::complete::be_i32, input_slice)` -pub fn tarray<'s, E, F, O>(parser: F, i: &'s [u8]) -> nom::IResult<&'s [u8], Vec, E> -where - F: Fn(&'s [u8]) -> nom::IResult<&'s [u8], O, E>, - E: ParseError<&'s [u8]> + Debug, +pub fn tobjstring<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], &'s str, E> + where + E: RootError<&'s [u8]>, { - let (i, counts) = be_i32(i)?; - count(parser, counts as usize)(i) + tuple(( + be_u16.context("tobjstring version"), + tobject.context("tobjstring object"), + string.context("tobjstring name"), + eof.context("tobjstring must consume input") + )).map(|(_, _, name, _)| name) + .parse(input) } -fn decode_reader(bytes: &[u8], magic: &str) -> Result, Error> { - let mut ret = vec![]; - match magic { - "ZL" => { - let mut decoder = ZlibDecoder::new(bytes); - decoder.read_to_end(&mut ret)?; - } - "XZ" => { - let mut reader = std::io::BufReader::new(bytes); - xz_decompress(&mut reader, &mut ret).unwrap(); - } - "L4" => { - use lz4_compress::decompress; - let (bytes, _checksum) = be_u64::<()>(bytes).unwrap(); - ret = decompress(bytes).unwrap(); - } - m => return std::dbg!(Err(format_err!("Unsupported compression format `{}`", m))), - }; - Ok(ret) -} - -/// Decompress the given buffer. Figures out the compression algorithm from the preceeding \"magic\" bytes -pub fn decompress(input: &[u8]) -> nom::IResult<&[u8], Vec> { - map_res( - tuple((|i| take_str!(i, 2usize), take(7usize), rest)), - |(magic, _header, comp_buf)| decode_reader(comp_buf, magic), - )(input) +/// Parse a so-called `TArray`. Note that ROOT's `TArray`s are actually not fixed size. +/// Example usage for TArrayI: `tarray(nom::complete::be_i32).parse(input_slice)` +pub fn tarray<'s, E, F, O>(parser: F) -> impl nom::Parser<&'s [u8], Vec, E> + where + F: Parser<&'s [u8], O, E>, + E: RootError<&'s [u8]>, +{ + length_count(be_u32, parser).context("tarray") } /// Parse a null terminated string pub fn c_string<'s, E>(i: &'s [u8]) -> nom::IResult<&[u8], &str, E> -where - E: ParseError<&'s [u8]> + Debug, + where + E: RootError<&'s [u8]>, { - let (i, s) = map_res(take_until(b"\x00".as_ref()), str::from_utf8)(i)?; - // consume the null tag - let (i, _) = take(1usize)(i)?; - Ok((i, s)) + map_res( + take_until(b"\x00".as_ref()).terminated(be_u8.verify(|&v| v == 0)), + str::from_utf8, + ) + .context("c string") + .parse(i) } /// Figure out the class we are looking at. The data might not be @@ -236,31 +245,43 @@ where /// buffer.This is modeled after ROOT's `TBufferFile::ReadObjectAny` and /// `TBufferFile::ReadClass` pub fn classinfo<'s, E>(i: &'s [u8]) -> nom::IResult<&[u8], ClassInfo, E> -where - E: ParseError<&'s [u8]> + Debug, + where + E: RootError<&'s [u8]>, { - let (i, tag) = { - let (i, bcnt) = be_u32(i)?; - if !is_byte_count(&bcnt) || bcnt == Flags::NEW_CLASSTAG.bits() { - (i, bcnt) - } else { - be_u32(i)? - } - }; - let (i, cl) = match tag as u32 { - 0xFFFF_FFFF => { - let (i, cl) = map!(i, c_string, ClassInfo::New)?; - (i, cl) + let (i, tag) = alt(( + be_u32 + .verify(|&v| !is_byte_count(&v) || v == Flags::NEW_CLASSTAG.bits()) + .context("class info: new classtag or not a valid bytecount"), + be_u32 + .verify(|&v| is_byte_count(&v) && v != Flags::NEW_CLASSTAG.bits()) + .context("class info: class tag preceded by byte count") + .precedes(be_u32) + )).parse(i)?; + + + match tag as u32 { + 0xFFFF_FFFF => { // new classtag mask + c_string.map(ClassInfo::New).parse(i) } tag => { if Flags::from_bits_truncate(tag).contains(Flags::CLASS_MASK) { - (i, ClassInfo::Exists(tag & !Flags::CLASS_MASK.bits())) + Ok((i, ClassInfo::Exists(tag & !Flags::CLASS_MASK.bits()))) } else { - (i, ClassInfo::References(tag)) + Ok((i, ClassInfo::References(tag))) } } - }; - Ok((i, cl)) + } +} + +struct RootContextParser<'s, I, O, E> { + context: &'s Context, + parser: fn(&'s Context, I) -> IResult, +} + +impl<'s, I, O, E> Parser for RootContextParser<'s, I, O, E> { + fn parse(&mut self, input: I) -> IResult { + self.parser(self.context, input) + } } /// Figure out the class we are looking at. This parser immediately @@ -268,79 +289,79 @@ where /// this buffer and the associated data. This function needs a /// `Context`, though, which may not be available. If so, have a look /// at the `classinfo` parser. -pub fn class_name_and_buffer<'s, E>( - i: &'s [u8], - context: &'s Context, -) -> nom::IResult<&'s [u8], (&'s str, &'s [u8]), E> -where - E: ParseError<&'s [u8]> + std::fmt::Debug, +pub fn class_name_and_buffer<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], (&'s str, &'s [u8]), E> + where + E: RootError<&'s [u8]>, { - let ctx_offset = u32::try_from(context.offset) - .expect("Encountered pointer larger than 32 bits. Please file a bug."); - let (i, ci) = classinfo(i)?; - Ok(match ci { - ClassInfo::New(s) => { - let (i, buf) = length_value(checked_byte_count, rest)(i)?; - (i, (s, buf)) - } - ClassInfo::Exists(tag) => { - let name = { - let abs_offset = tag & !Flags::CLASS_MASK.bits(); - let s = &context.s[((abs_offset - ctx_offset) as usize)..]; - let (_, (name, _)) = class_name_and_buffer(s, context)?; - name - }; - let (i, buf) = length_value(checked_byte_count, rest)(i)?; - (i, (name, buf)) - } - ClassInfo::References(tag) => { - let (name, buf) = { - let abs_offset = tag; - // Sometimes, the reference points to `0`; so we return an empty slice - if abs_offset == 0 { - ("", &context.s[..0]) - } else { - let s = &context.s[((abs_offset - ctx_offset) as usize)..]; - let (_, (name, buf)) = class_name_and_buffer(s, context)?; - (name, buf) + RootContextParser { + context, + parser: (|ctx, i| { + let ctx_offset = u32::try_from(ctx.offset) + .expect("Encountered pointer larger than 32 bits. Please file a bug."); + let (i, ci) = classinfo(i)?; + Ok(match ci { + ClassInfo::New(s) => { + let (i, buf) = length_value(checked_byte_count, rest)(i)?; + (i, (s, buf)) } - }; - (i, (name, buf)) - } - }) + ClassInfo::Exists(tag) => { + let name = { + let abs_offset = tag & !Flags::CLASS_MASK.bits(); + // TODO handle insufficient buffer length, abs_offset < ctx_offset + let s = &ctx.s[((abs_offset - ctx_offset) as usize)..]; + let (_, (name, _)) = class_name_and_buffer(ctx).parse(s)?; + name + }; + let (i, buf) = length_value(checked_byte_count, rest)(i)?; + (i, (name, buf)) + } + ClassInfo::References(tag) => { + let (name, buf) = { + let abs_offset = tag; + // Sometimes, the reference points to `0`; so we return an empty slice + if abs_offset == 0 { + ("", &ctx.s[..0]) + } else { + // TODO as above + let s = &ctx.s[((abs_offset - ctx_offset) as usize)..]; + let (_, (name, buf)) = class_name_and_buffer(ctx).parse(s)?; + (name, buf) + } + }; + (i, (name, buf)) + } + }) + }), + } } -/// Parse a `Raw` chunk from the given input buffer. This is usefull when one does not know the exact type at the time of parsing -#[rustfmt::skip::macros(do_parse)] -pub fn raw<'s, E>(input: &'s [u8], context: &'s Context) -> nom::IResult<&'s [u8], Raw<'s>, E> -where - E: ParseError<&'s [u8]> + Debug, +/// Parse a `Raw` chunk from the given input buffer. This is useful when one does not +/// know the exact type at the time of parsing +pub fn raw<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Raw<'s>, E> + where + E: RootError<&'s [u8]>, { - do_parse!(input, - string_and_obj: call!(class_name_and_buffer, context) >> - // obj: length_value!(checked_byte_count, call!(nom::rest)) >> - ({let (classinfo, obj) = string_and_obj; - Raw{classinfo, obj}}) - ) + class_name_and_buffer(context) + .map(|(classinfo, obj)| Raw { classinfo, obj }) } /// Same as `raw` but doesn't require a `Context` as input. Panics if /// a `Context` is required to parse the underlying buffer (i.e., the /// given buffer contains a reference to some other part of the file. pub fn raw_no_context<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], (ClassInfo, &[u8]), E> -where - E: ParseError<&'s [u8]> + Debug, + where + E: RootError<&'s [u8]>, { use super::ClassInfo::*; - let (input, ci) = classinfo(input)?; - let obj = match ci { + let (input, ci) = classinfo.parse(input)?; + + match ci { // point to beginning of slice - References(0) => value!(input, &input[..0]), - New(_) | Exists(_) => length_value!(input, checked_byte_count, call!(rest)), + References(0) => take(0usize).map(|o| (ci, o)).parse(input), + New(_) | Exists(_) => length_data(checked_byte_count).map(|o| (ci, o)).parse(input), // If its a reference to any other thing but 0 it needs a context _ => panic!("Object needs context!"), - }; - obj.map(|(i, o)| (i, (ci, o))) + } } /// ESD trigger classes are strings describing a particular @@ -349,8 +370,8 @@ where /// as an `TObjArray` of `TNamed` objects for each event. This breaks /// it down to a simple vector pub fn parse_tobjarray_of_tnameds<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], Vec, E> -where - E: ParseError<&'s [u8]> + Debug, + where + E: RootError<&'s [u8]>, { // each element of the tobjarray has a Vec let (input, vals) = length_value(checked_byte_count, tobjarray_no_context)(input)?; @@ -372,24 +393,25 @@ where /// generated YAML code (for ALICE ESD files at least). This function /// reconstructs a float from the exponent and mantissa pub fn parse_custom_mantissa<'s, E>(input: &'s [u8], nbits: usize) -> nom::IResult<&[u8], f32, E> -where - E: ParseError<&'s [u8]> + Debug, + where + E: RootError<&'s [u8]>, { // TODO: Use ByteOrder crate to be cross-platform? - pair(be_u8, be_u16)(input).map(|(input, (exp, man))| { + pair(be_u8, be_u16).map(|(exp, man)| { let mut s = u32::from(exp); // Move the exponent into the last 23 bits s <<= 23; s |= (u32::from(man) & ((1 << (nbits + 1)) - 1)) << (23 - nbits); - (input, f32::from_bits(s)) - }) + f32::from_bits(s) + }).parse(input) } #[cfg(test)] mod classinfo_test { - use super::classinfo; use nom::error::VerboseError; + use super::classinfo; + /// There is an issue where the following is parsed differently on /// nightly ( rustc 1.25.0-nightly (79a521bb9 2018-01-15)), than /// on stable, if verbose-errors are enabled for nom in the diff --git a/root-io/src/core/tkey.rs b/root-io/src/core/tkey.rs index cdd64ba..49d4c5a 100644 --- a/root-io/src/core/tkey.rs +++ b/root-io/src/core/tkey.rs @@ -1,8 +1,12 @@ -use nom::combinator::map; -use nom::number::complete::*; use nom::*; +use nom::bytes::complete::take; +use nom::multi::length_count; +use nom::number::complete::{be_i16, be_u16, be_u32, be_u64}; +use nom::sequence::tuple; +use nom_supreme::ParserExt; use crate::core::*; +use crate::core::compression::{decompress, DecompressionError}; #[derive(Debug, Clone)] #[allow(dead_code)] @@ -28,66 +32,80 @@ pub struct TKey { pub(crate) obj: Vec, } -#[rustfmt::skip::macros(do_parse)] -named!( - #[doc=r#"Header of a TKey Usually, TKeys are followed up by their -content, but there is one "index" in ever root file where only the -TKey headers are stored for faster later `Seek`ing"#], - pub tkey_header<&[u8], TKeyHeader>, - do_parse!(total_size: be_u32 >> - version: be_u16 >> - uncomp_len: be_u32 >> - datime: be_u32 >> - key_len: be_i16 >> - cycle: be_i16 >> - seek_key: call!(seek_point, version) >> - seek_pdir: call!(seek_point, version) >> - class_name: string >> - obj_name: string >> - obj_title: string >> - (TKeyHeader { - total_size, - version, - uncomp_len, - datime, - key_len, - cycle, - seek_key, - seek_pdir, - class_name, - obj_name, - obj_title, - }) - ) -); +/// Header of a TKey +/// Usually, TKeys are followed up by their content, but there is one "index" in every +/// root file where only the TKey headers are stored for faster later `Seek`ing +pub fn tkey_header<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], TKeyHeader, E> + where + E: RootError<&'s [u8]> +{ + tuple(( + be_u32.context("total size"), + be_u16.context("version"), + be_u32.context("uncompressed length"), + be_u32.context("datime"), + be_i16.context("key length"), + be_i16.context("cycle") + )).flat_map(make_fn(|(total_size, version, uncomp_len, datime, key_len, cycle)| tuple(( + seek_point(version).context("seek key"), + seek_point(version).context("seek pdir"), + string.context("class name"), + string.context("object name"), + string.context("object title") + )).map(move |(seek_key, seek_pdir, class_name, obj_name, obj_title)| TKeyHeader { + total_size, + version, + uncomp_len, + datime, + key_len, + cycle, + seek_key, + seek_pdir, + class_name: class_name.to_string(), + obj_name: obj_name.to_string(), + obj_title: obj_title.to_string(), + }))).context("tkey header").parse(input) +} /// Parse a file-pointer based on the version of the file -fn seek_point(input: &[u8], version: u16) -> nom::IResult<&[u8], u64> { - if version > 1000 { - be_u64(input) - } else { - map(be_u32, u64::from)(input) +fn seek_point<'s, E>(version: u16) -> impl Parser<&'s [u8], u64, E> + where + E: RootError<&'s [u8]> +{ + move |i| { + if version > 1000 { + be_u64.parse(i) + } else { + be_u32.map(|v| v as u64).parse(i) + } } } -#[rustfmt::skip::macros(do_parse)] -named!( - #[doc="Parse a full TKey including its payload"], - pub tkey<&[u8], TKey>, - do_parse!(hdr: tkey_header >> - obj: take!(hdr.total_size - hdr.key_len as u32) >> - ({ - let obj = if hdr.uncomp_len as usize > obj.len() { - decompress(obj).unwrap().1 - } else { - obj.to_vec() - }; - TKey {hdr, obj} - }) - ) -); +/// Parse a full TKey including its payload +pub fn tkey<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], TKey, E> + where + E: RootError<&'s [u8]> +{ + let (i, hdr) = tkey_header.parse(input)?; + let buflen = hdr.total_size - hdr.key_len as u32; + let uncomp_len = hdr.uncomp_len; + + let mut opthdr = Some(hdr); + + take(buflen).map_res::<_, _, DecompressionError>(|buf: &[u8]| { + let obj = if uncomp_len as usize > buf.len() { + decompress(buf)? + } else { + buf.to_vec() + }; + Ok(TKey { hdr: opthdr.take().unwrap(), obj }) + }).context("tkey").parse(i) +} /// Special thing for the keylist in the file header -pub(crate) fn tkey_headers(input: &[u8]) -> IResult<&[u8], Vec> { - length_count!(input, be_i32, tkey_header) +pub(crate) fn tkey_headers<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], Vec, E> + where + E: RootError<&'s [u8]> +{ + length_count(be_u32, tkey_header).parse(input) } diff --git a/root-io/src/core/tstreamer.rs b/root-io/src/core/tstreamer.rs index 9925cd5..ad3de3b 100644 --- a/root-io/src/core/tstreamer.rs +++ b/root-io/src/core/tstreamer.rs @@ -1,20 +1,19 @@ -use std::fmt::Debug; - -use nom::{ - error::{ParseError, VerboseError}, - multi::length_data, - multi::length_value, - number::complete::*, - IResult, -}; - +use nom::{error::VerboseError, IResult, multi::length_value, Parser}; +use nom::combinator::eof; +use nom::multi::{count, length_count}; +use nom::number::complete::{be_i32, be_u16, be_u32}; +use nom::sequence::{pair, tuple}; +use nom_supreme::ParserExt; use quote::*; +use std::fmt::Debug; + use crate::{ code_gen::rust::{ToRustParser, ToRustType}, code_gen::utils::{alias_or_lifetime, sanitize, type_is_core}, core::*, }; +use crate::core::SemanticError::VersionNotSupported; /// Union of all posible `TStreamers`. See figure at /// @@ -68,14 +67,14 @@ pub(crate) enum TStreamer { /// type of STL vector vtype: StlTypeID, /// STL contained type - ctype: TypeID, + ctype: TypeId, }, StlString { el: TStreamerElement, /// type of STL vector vtype: StlTypeID, /// STL contained type - ctype: TypeID, + ctype: TypeId, }, } @@ -85,7 +84,7 @@ pub(crate) enum TStreamer { pub(crate) struct TStreamerElement { ver: u16, name: TNamed, - el_type: TypeID, + el_type: TypeId, size: i32, array_len: i32, array_dim: i32, @@ -99,141 +98,192 @@ pub(crate) struct TStreamerElement { /// Parse a `TStreamer` from a `Raw` buffer. This is usually the case /// after reading the `TList` of `TStreamerInfo`s from a ROOT file -#[rustfmt::skip::macros(do_parse)] -pub(crate) fn tstreamer<'s, E>(raw: &Raw<'s>) -> IResult<&'s [u8], TStreamer, E> -where - E: ParseError<&'s [u8]> + Debug, +pub(crate) fn tstreamer<'s, E>(raw: Raw<'s>) -> IResult, TStreamer, E> + where + E: RootError<&'s [u8]>, { - let wrapped_tstreamerelem = |i| length_value!(i, checked_byte_count, tstreamerelement); - match raw.classinfo { - "TStreamerBase" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - version_base: be_i32 >> - (TStreamer::Base {el, version_base})), - "TStreamerBasicType" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - (TStreamer::BasicType {el})), - "TStreamerBasicPointer" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - cvers: be_i32 >> - cname: string >> - ccls: string >> - (TStreamer::BasicPointer {el, cvers, cname, ccls})), - "TStreamerLoop" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - cvers: be_i32 >> - cname: string >> - ccls: string >> - (TStreamer::Loop {el, cvers, cname, ccls})), - "TStreamerObject" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - (TStreamer::Object {el})), - "TStreamerObjectPointer" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - (TStreamer::ObjectPointer {el})), - "TStreamerObjectAny" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - (TStreamer::ObjectAny {el})), - "TStreamerObjectAnyPointer" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - (TStreamer::ObjectAnyPointer {el})), - "TStreamerString" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - (TStreamer::String {el})), - "TStreamerSTL" => do_parse!(raw.obj, - _ver: be_u16 >> - el: wrapped_tstreamerelem >> - vtype: map!(be_i32, StlTypeID::new) >> - ctype: map_res!(be_i32, TypeID::new) >> - (TStreamer::Stl {el, vtype, ctype})), + let wrapped_tstreamerelem = length_value(checked_byte_count, tstreamerelement); + + let result = match raw.classinfo { + "TStreamerBase" => tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.context("version base") + )).map(|(_ver, el, version_base)| TStreamer::Base { el, version_base }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerBasicType" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::BasicType { el }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerBasicPointer" => tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.context("cvers"), + string.context("cname"), + string.context("ccls") + )).map(|(_ver, el, cvers, cname, ccls)| TStreamer::BasicPointer { el, cvers, cname: cname.to_string(), ccls: ccls.to_string() }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerLoop" => tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.context("cvers"), + string.context("cname"), + string.context("ccls") + )).map(|(_ver, el, cvers, cname, ccls)| TStreamer::Loop { el, cvers, cname: cname.to_string(), ccls: ccls.to_string() }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerObject" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::Object { el }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerObjectPointer" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::ObjectPointer { el }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerObjectAny" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::ObjectAny { el }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerObjectAnyPointer" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::ObjectAnyPointer { el }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerString" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::String { el }) + .terminated(eof).context("tstreamer").parse(raw.obj), + + "TStreamerSTL" => tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.map(StlTypeID::new).context("vtype"), + be_i32.map_res(TypeId::new).context("ctype") + )).map(|(_ver, el, vtype, ctype)| TStreamer::Stl { el, vtype, ctype }) + .terminated(eof).context("tstreamer").parse(raw.obj), + "TStreamerSTLstring" => { // Two version bcs `stlstring` derives from `stl` - let (i, _ver) = be_u16(raw.obj)?; - let (_, stl_buffer) = length_data(checked_byte_count)(i)?; - let (stl_buffer, _ver) = be_u16(stl_buffer)?; - let (stl_buffer, el) = wrapped_tstreamerelem(stl_buffer)?; - let (stl_buffer, vtype) = map!(stl_buffer, be_i32, StlTypeID::new)?; - let (_stl_buffer, ctype) = map_res!(stl_buffer, be_i32, TypeID::new)?; - Ok((i, TStreamer::StlString { el, vtype, ctype })) + be_u16.precedes(length_value(checked_byte_count, tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.map(StlTypeID::new).context("vtype"), + be_i32.map_res(TypeId::new).context("ctype") + )))).map(|(_ver, el, vtype, ctype)| TStreamer::StlString { el, vtype, ctype }) + .terminated(eof).context("tstreamer").parse(raw.obj) } ci => unimplemented!("Unknown TStreamer {}", ci), - } + }; + + result.map(|(i, res)| (Raw { classinfo: raw.classinfo, obj: i }, res)) } -/// Return all `TSreamerInfo` for the data in this file -pub fn streamers<'s, E>(i: &'s [u8], ctx: &'s Context) -> IResult<&'s [u8], Vec, E> +/* +struct Streamers<'s> { + ctx: &'s Context +} + +impl<'s> Parser<&'s [u8], Vec, E> for Streamers<'s> where - E: ParseError<&'s [u8]> + Debug, + E: ParseError<'s [u8]> + Debug + + */ +/// Return all `TSreamerInfo` for the data in this file +pub fn streamers<'s, E>(ctx: &'s Context) -> impl Parser<&'s [u8], Vec, E> + 's + where + E: RootError<&'s [u8]>, { - // Dunno why we are 4 bytes off with the size of the streamer info... - - // This TList in the payload has a bytecount in front... - let (i, tlist_objs) = length_value(checked_byte_count, |i| tlist(i, ctx))(i)?; - // Mainly this is a TList of `TStreamerInfo`s, but there might - // be some "rules" in the end - let streamers = tlist_objs - .iter() - .filter_map(|raw| match raw.classinfo { - "TStreamerInfo" => Some(raw.obj), - _ => None, - }) - .map(|i| tstreamerinfo::>(i, ctx).unwrap().1) - .collect(); - // Parse the "rules", if any, from the same tlist - let _rules: Vec<_> = tlist_objs - .iter() - .filter_map(|raw| match raw.classinfo { - "TList" => Some(raw.obj), - _ => None, - }) - .map(|i| { - let tl = tlist::>(i, ctx).unwrap().1; - // Each `Rule` is a TList of `TObjString`s - tl.iter() - .map(|el| tobjstring(el.obj).unwrap().1) - .collect::>() - }) - .collect(); - Ok((i, streamers)) + + move |i| { + // Dunno why we are 4 bytes off with the size of the streamer info... + + // This TList in the payload has a bytecount in front... + let (i, tlist_objs) = length_value(checked_byte_count, tlist(ctx))(i)?; + // Mainly this is a TList of `TStreamerInfo`s, but there might + // be some "rules" in the end + let streamers = tlist_objs + .iter() + .filter_map(|raw| match raw.classinfo { + "TStreamerInfo" => Some(raw.obj), + _ => None, + }) + .map(|buf| tstreamerinfo::>(ctx).parse(buf).unwrap().1) + .collect(); + // Parse the "rules", if any, from the same tlist + let _rules: Vec<_> = tlist_objs + .iter() + .filter_map(|raw| match raw.classinfo { + "TList" => Some(raw.obj), + _ => None, + }) + .map(|buf| { + let tl = tlist::>(ctx).parse(buf).unwrap().1; + // Each `Rule` is a TList of `TObjString`s + tl.iter() + .map(|el| tobjstring::<'s, E>(el.obj).unwrap().1) + .collect::>() + }) + .collect(); + + for raw in tlist_objs { + match raw.classinfo { + "TStreamerInfo" | "TList" => {} + other => println!("Got unexpected class in streamers list: {other}") + } + }; + + Ok((i, streamers)) + } } -#[rustfmt::skip::macros(do_parse)] /// The element which is wrapped in a TStreamer fn tstreamerelement<'s, E>(i: &'s [u8]) -> IResult<&'s [u8], TStreamerElement, E> -where - E: ParseError<&'s [u8]> + Debug, + where + E: RootError<&'s [u8]>, { - do_parse!(i, - ver: be_u16 >> - name: length_value!(checked_byte_count, tnamed) >> - el_type: map_res!(be_i32, TypeID::new) >> - size: be_i32 >> - array_len: be_i32 >> - array_dim: be_i32 >> - max_idx: switch!(value!(ver), - 1 => length_count!(be_i32, be_u32) | - _ => count!(be_u32, 5)) >> - type_name: string >> - _eof: eof!() >> - ({ - if ver <= 3 { - unimplemented!(); - } - TStreamerElement { - ver, name, el_type, size, array_len, - array_dim, max_idx, type_name - } - }) - ) + + tuple(( + be_u16.context("version"), + length_value(checked_byte_count, tnamed).context("name"), + be_i32.map_res(TypeId::new).context("element type"), + be_i32.context("size"), + be_i32.context("array length"), + be_i32.context("array dimensions") + )).flat_map(make_fn(|(ver, name, el_type, size, array_len, array_dim): (u16, TNamed, TypeId, i32, i32, i32)| { + let mut optname = Some(name); + tuple(( + move |i| if ver == 1 { length_count(be_u32, be_u32)(i) } else { count(be_u32, 5)(i) }, + string, + eof + )).map_res(move |(max_idx, type_name, _)| { + if ver <= 3 { + Err(VersionNotSupported(Component::TStreamerElement, ver as u32, "must be >= 4")) + } else { + Ok(TStreamerElement { + ver, + name: optname.take().unwrap(), + el_type, + size, + array_len, + array_dim, + max_idx, + type_name: type_name.to_string(), + }) + } + }) + })).context("tstreamer element").parse(i) } impl TStreamer { @@ -282,7 +332,7 @@ impl ToTokens for TStreamer { impl ToRustType for TStreamer { fn type_name(&self) -> Tokens { - use self::TypeID::*; + use self::TypeId::*; let name = Ident::new(alias_or_lifetime(&self.elem().name.name.to_owned())); match self { TStreamer::Base { ref el, .. } => { @@ -362,7 +412,7 @@ impl ToRustType for TStreamer { impl ToRustParser for TStreamer { fn to_inline_parser(&self) -> Tokens { - use self::TypeID::*; + use self::TypeId::*; let name = match self { // `Base` types, i.e. types from which the current object inherited; // In that case the name is actually the type diff --git a/root-io/src/core/tstreamerinfo.rs b/root-io/src/core/tstreamerinfo.rs index e844517..4a43245 100644 --- a/root-io/src/core/tstreamerinfo.rs +++ b/root-io/src/core/tstreamerinfo.rs @@ -1,9 +1,10 @@ -use std::fmt::Debug; - -use nom::{error::ParseError, multi::length_value, number::complete::*, IResult}; - +use nom::{multi::length_value, number::complete::*, Parser}; +use nom::combinator::eof; +use nom::error::dbg_dmp; use quote::*; +use std::fmt::Debug; + use crate::{ code_gen::rust::{ToNamedRustParser, ToRustParser, ToRustStruct, ToRustType}, code_gen::utils::type_is_core, @@ -21,37 +22,35 @@ pub struct TStreamerInfo { } /// Parse one `TStreamerInfo` object (as found in the `TList`) -#[rustfmt::skip::macros(do_parse)] -pub(crate) fn tstreamerinfo<'s, E>( - i: &'s [u8], - context: &'s Context, -) -> IResult<&'s [u8], TStreamerInfo, E> -where - E: ParseError<&'s [u8]> + Debug, +pub(crate) fn tstreamerinfo<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], TStreamerInfo, E> + where + E: RootError<&'s [u8]>, { - let parse_members = |i| tobjarray(|raw_obj, _context| tstreamer(raw_obj), i, context); + move |i| { + let parse_members = tobjarray(|_| tstreamer, context); - let (i, tstreamerinfo_ver) = be_u16(i)?; - let (i, named) = length_value!(i, checked_byte_count, tnamed)?; - let (i, checksum) = be_u32(i)?; - let (i, new_class_version) = be_u32(i)?; - let (i, _size_tobjarray_with_class_info) = checked_byte_count(i)?; - let (i, _class_info_objarray) = classinfo(i)?; - let (i, data_members) = length_value( - nom::dbg_dmp(checked_byte_count, "byte count"), - nom::dbg_dmp(parse_members, "parse_members"), - )(i)?; - let (i, _eof) = eof!(i,)?; - Ok(( - i, - TStreamerInfo { - tstreamerinfo_ver, - named, - checksum, - new_class_version, - data_members, - }, - )) + let (i, tstreamerinfo_ver) = be_u16(i)?; + let (i, named) = length_value(checked_byte_count, tnamed)(i)?; + let (i, checksum) = be_u32(i)?; + let (i, new_class_version) = be_u32(i)?; + let (i, _size_tobjarray_with_class_info) = checked_byte_count(i)?; + let (i, _class_info_objarray) = classinfo(i)?; + let (i, data_members) = length_value( + dbg_dmp(checked_byte_count, "byte count"), + dbg_dmp(parse_members, "parse_members"), + )(i)?; + let (i, _eof) = eof(i)?; + Ok(( + i, + TStreamerInfo { + tstreamerinfo_ver, + named, + checksum, + new_class_version, + data_members, + }, + )) + } } impl ToRustParser for TStreamerInfo { diff --git a/root-io/src/core/typeid.rs b/root-io/src/core/typeid.rs index 5c1cf19..8ed99ae 100644 --- a/root-io/src/core/typeid.rs +++ b/root-io/src/core/typeid.rs @@ -1,15 +1,34 @@ -use failure::Error; use quote::*; +use thiserror::Error; use crate::code_gen::rust::{ToRustParser, ToRustType}; +#[derive(Error, Debug)] +pub enum TypeIdError { + #[error("Invalid Type Id {0}")] + InvalidTypeId(i32), + #[error("Invalid Primitive Id")] + PrimitiveError(#[from] InvalidPrimitive), +} + +#[derive(Error, Debug)] +pub enum InvalidPrimitive { + #[error("Invalid Primitive Id {0}")] + Id(i32), + #[error("Invalid Primitive Offset {0}")] + Offset(i32), + #[error("Invalid Primitive Array {0}")] + Array(i32), +} + + /// Integer ID describing a streamed type in a `TStreamer` -#[derive(Debug, Clone)] -pub(crate) enum TypeID { +#[derive(Debug, Clone, Copy)] +pub(crate) enum TypeId { InvalidOrCounter(i32), - Primitive(PrimitiveID), - Offset(PrimitiveID), - Array(PrimitiveID), + Primitive(PrimitiveId), + Offset(PrimitiveId), + Array(PrimitiveId), Base, Object, Named, @@ -19,18 +38,18 @@ pub(crate) enum TypeID { ObjectP, String, AnyP, - STL, - STLString, + Stl, + StlString, Streamer, Unknown(i32), } /// ID describing a primitive type. This is a subset (1..19) of the integers used for `TypeID`. -#[derive(Debug, Clone)] -pub(crate) struct PrimitiveID(pub(crate) i32); +#[derive(Debug, Clone, Copy)] +pub(crate) struct PrimitiveId(pub(crate) i32); /// Type of a streamed STL container -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] pub(crate) enum StlTypeID { Vector, Bitset, @@ -39,25 +58,25 @@ pub(crate) enum StlTypeID { MultiMap, } -impl PrimitiveID { - pub(crate) fn new(id: i32) -> Result { +impl PrimitiveId { + pub(crate) fn new(id: i32) -> Option { match id { - 1..=19 => Ok(PrimitiveID(id)), - id => Err(format_err!("Invalid base type id {}", id)), + 1..=19 => Some(PrimitiveId(id)), + _id => None, } } } -impl TypeID { - pub(crate) fn new(id: i32) -> Result { - use self::TypeID::*; +impl TypeId { + pub(crate) fn new(id: i32) -> Result { + use self::TypeId::*; Ok(match id { // -1 may mean that this branch / leaf has no data, or that it has an elements-per-entry array... -1 => InvalidOrCounter(id), 0 => Base, - id @ 1..=19 => Primitive(PrimitiveID::new(id)?), - id @ 21..=39 => Offset(PrimitiveID::new(id - 20)?), - id @ 41..=59 => Array(PrimitiveID::new(id - 40)?), + id @ 1..=19 => Primitive(PrimitiveId::new(id).ok_or(InvalidPrimitive::Id(id))?), + id @ 21..=39 => Offset(PrimitiveId::new(id - 20).ok_or(InvalidPrimitive::Offset(id))?), + id @ 41..=59 => Array(PrimitiveId::new(id - 40).ok_or(InvalidPrimitive::Array(id))?), 61 => Object, 62 => Any, 63 => Objectp, @@ -66,8 +85,8 @@ impl TypeID { 66 => TObject, 67 => Named, 69 => AnyP, - 300 => STL, - 365 => STLString, + 300 => Stl, + 365 => StlString, 500 => Streamer, id => Unknown(id), }) @@ -87,9 +106,9 @@ impl StlTypeID { } } -impl ToRustType for TypeID { +impl ToRustType for TypeId { fn type_name(&self) -> Tokens { - use self::TypeID::*; + use self::TypeId::*; let t = match self { Primitive(ref id) | Offset(ref id) => id.type_name().to_string(), Array(ref id) => format!("Vec<{}>", id.type_name()), @@ -97,7 +116,7 @@ impl ToRustType for TypeID { ObjectP => "Option>".to_string(), String => "String".to_string(), // Some funky things which we just treat as byte strings for now - Object | STL | STLString | Streamer | Unknown(82) => "Vec".to_string(), + Object | Stl | StlString | Streamer | Unknown(82) => "Vec".to_string(), Any => "Vec".to_string(), AnyP => "Vec".to_string(), InvalidOrCounter(-1) => "u32".to_string(), @@ -108,7 +127,7 @@ impl ToRustType for TypeID { } } -impl ToRustParser for PrimitiveID { +impl ToRustParser for PrimitiveId { fn to_inline_parser(&self) -> Tokens { let t = match self.0 { 1 => "be_i8", //"kChar", @@ -142,7 +161,7 @@ impl ToRustParser for PrimitiveID { } } -impl ToRustType for PrimitiveID { +impl ToRustType for PrimitiveId { fn type_name(&self) -> Tokens { let t = match self.0 { 1 => "i8", //"kChar", diff --git a/root-io/src/core/types.rs b/root-io/src/core/types.rs index 089537c..0d7444e 100644 --- a/root-io/src/core/types.rs +++ b/root-io/src/core/types.rs @@ -1,9 +1,9 @@ +use nom::HexDisplay; + use std::fmt; use crate::core::Source; -use nom::HexDisplay; - /// Absolute point in file to seek data pub(crate) type SeekPointer = u64; @@ -25,7 +25,7 @@ bitflags! { /// Used in `TStreamerInfo` /// Describes if the following entry is a new class or if it was /// already described. -#[derive(Debug)] +#[derive(Clone,Copy,Debug)] pub enum ClassInfo<'s> { /// Class name of the new class New(&'s str), @@ -42,6 +42,7 @@ pub struct TObject { pub(crate) ver: u16, pub(crate) id: u32, pub(crate) bits: TObjectFlags, + pub(crate) _ref: Option, } /// A ROOT object with a name and a title diff --git a/root-io/src/lib.rs b/root-io/src/lib.rs index cbfb7a0..3abd308 100644 --- a/root-io/src/lib.rs +++ b/root-io/src/lib.rs @@ -13,21 +13,21 @@ //! The API surface is deliberately small to make the processing of said //! files as easy as possible. If you are looking for a particular //! parser chances have it that it exists but it is not marked as `pub`. +#![feature(negative_impls)] + #![allow(clippy::cognitive_complexity)] #![recursion_limit = "256"] +extern crate alice_open_data; #[macro_use] extern crate bitflags; -#[macro_use] +extern crate flate2; +extern crate lzma_rs; extern crate nom; #[macro_use] extern crate quote; -#[macro_use] -extern crate failure; -extern crate flate2; -extern crate lzma_rs; extern crate reqwest; -extern crate alice_open_data; +pub use crate::core::{FileItem, RootFile, Source}; // pub mod core_types; mod code_gen; @@ -39,7 +39,5 @@ pub mod tree_reader; // Contains the stream_zip macro pub mod utils; -pub use crate::core::{FileItem, RootFile, Source}; - /// Offset when using Context; should be in `Context`, maybe? const MAP_OFFSET: u64 = 2; diff --git a/root-io/src/test_data/README.md b/root-io/src/test_data/README.md index 76f0f2c..c74475a 100644 --- a/root-io/src/test_data/README.md +++ b/root-io/src/test_data/README.md @@ -1 +1,2 @@ -This directory contains binary ROOT files for testing purposes. They where primarily taken from the [uproot project]() and from the [ALICE public data](http://opendata.cern.ch). +This directory contains binary ROOT files for testing purposes. They where primarily taken from the [uproot project]() +and from the [ALICE public data](http://opendata.cern.ch). diff --git a/root-io/src/tests/basic_io.rs b/root-io/src/tests/basic_io.rs index 7b7eb2c..43a81b3 100644 --- a/root-io/src/tests/basic_io.rs +++ b/root-io/src/tests/basic_io.rs @@ -1,9 +1,12 @@ #![cfg(not(target_arch = "wasm32"))] -use crate::core::*; +use nom::Parser; use nom::error::VerboseError; + use std::path::PathBuf; +use crate::core::*; + #[test] fn list_of_rules() { let s = &[ @@ -35,7 +38,7 @@ fn list_of_rules() { }; use nom::HexDisplay; println!("{}", s.to_hex(16)); - let (_, (_name, obj)) = class_name_and_buffer::>(s, &context).unwrap(); + let (_, (_name, obj)) = class_name_and_buffer::>(&context).parse(s).unwrap(); println!("{}", obj.to_hex(16)); let (obj, _ci) = classinfo::>(obj).unwrap(); println!("{:?}", _ci); diff --git a/root-io/src/tree_reader/branch.rs b/root-io/src/tree_reader/branch.rs index 214e304..3580725 100644 --- a/root-io/src/tree_reader/branch.rs +++ b/root-io/src/tree_reader/branch.rs @@ -1,12 +1,9 @@ -use std::fmt::Debug; - use futures::prelude::*; -use nom::{ - error::{ParseError, VerboseError}, - multi::{count, length_value}, - number::complete::*, - IResult, -}; +use nom::{error::VerboseError, IResult, multi::{count, length_data, length_value}, number::complete::*, Parser}; +use nom::combinator::eof; +use nom_supreme::ParserExt; + +use std::fmt::Debug; use crate::{ code_gen::rust::ToRustType, core::parsers::*, core::types::*, @@ -117,9 +114,9 @@ impl TBranch { /// }).await; ///# } /// ``` - pub fn as_fixed_size_iterator(&self, p: P) -> impl Stream - where - P: Fn(&[u8]) -> IResult<&[u8], T, VerboseError<&[u8]>>, + pub fn as_fixed_size_iterator(&self, p: P) -> impl Stream + where + P: Fn(&[u8]) -> IResult<&[u8], T, VerboseError<&[u8]>>, { stream::iter(self.containers().to_owned()) .then(|basket| async move { basket.raw_data().await.unwrap() }) @@ -172,93 +169,99 @@ impl TBranch { /// `TBranchElements` are a subclass of `TBranch` if the content is an Object /// We ignore the extra information for now and just parse the TBranch"Header" in either case -pub(crate) fn tbranch_hdr<'s, E>(raw: &Raw<'s>, ctxt: &'s Context) -> IResult<&'s [u8], TBranch, E> -where - E: ParseError<&'s [u8]> + Debug, +pub(crate) fn tbranch_hdr<'s, E>(ctxt: &'s Context) -> impl Parser, TBranch, E> + where + E: RootError<&'s [u8]>, { - match raw.classinfo { - "TBranchElement" | "TBranchObject" => { - let (i, _ver) = be_u16(raw.obj)?; - length_value!(i, checked_byte_count, call!(tbranch, ctxt)) - } - "TBranch" => tbranch(raw.obj, ctxt), - name => panic!("Unexpected Branch type {}", name), + move |raw: Raw<'s>| { + match raw.classinfo { + "TBranchElement" | "TBranchObject" => { + be_u16.precedes(length_value(checked_byte_count, tbranch(ctxt))) + .terminated(eof) + .parse(raw.obj) + } + "TBranch" => + tbranch(ctxt) + .terminated(eof) + .parse(raw.obj), + name => panic!("Unexpected Branch type {}", name), + }.map(|(i, res)| (Raw { classinfo: raw.classinfo, obj: i }, res)) } } -#[rustfmt::skip::macros(do_parse)] -fn tbranch<'s, E>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], TBranch, E> -where - E: ParseError<&'s [u8]> + Debug, +fn tbranch<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], TBranch, E> + where + E: RootError<&'s [u8]>, { - let (i, _ver) = verify!(i, be_u16, |v| [11, 12].contains(v))?; - let (i, tnamed) = length_value!(i, checked_byte_count, tnamed)?; - let (i, _tattfill) = length_data!(i, checked_byte_count)?; - let (i, fcompress) = be_i32(i)?; - let (i, fbasketsize) = be_i32(i)?; - let (i, fentryoffsetlen) = be_i32(i)?; - let (i, fwritebasket) = be_i32(i)?; - let (i, fentrynumber) = be_i64(i)?; - let (i, foffset) = be_i32(i)?; - let (i, fmaxbaskets) = be_i32(i)?; - let (i, fsplitlevel) = be_i32(i)?; - let (i, fentries) = be_i64(i)?; - let (i, ffirstentry) = be_i64(i)?; - let (i, ftotbytes) = be_i64(i)?; - let (i, fzipbytes) = be_i64(i)?; - let (i, fbranches) = - length_value(checked_byte_count, |i| tobjarray(tbranch_hdr, i, context))(i)?; - let (i, fleaves) = length_value(checked_byte_count, |i| { - tobjarray(TLeaf::parse_from_raw, i, context) - })(i)?; - let (i, fbaskets) = length_value(checked_byte_count, |i| { - tobjarray(|r, _context| Ok((&[], r.obj)), i, context) - })(i)?; - let (i, fbasketbytes) = preceded!(i, be_u8, count!(be_i32, fmaxbaskets as usize))?; - let (i, fbasketentry) = preceded!(i, be_u8, count!(be_i64, fmaxbaskets as usize))?; - let (i, fbasketseek) = preceded!(i, be_u8, count!(be_u64, fmaxbaskets as usize))?; - let (i, ffilename) = string(i)?; + move |inpt| { + let (i, _ver) = be_u16.verify(|v| [11, 12].contains(v)).parse(inpt)?; + let (i, tnamed) = length_value(checked_byte_count, tnamed).parse(i)?; + let (i, _tattfill) = length_data(checked_byte_count).parse(i)?; + let (i, fcompress) = be_i32(i)?; + let (i, fbasketsize) = be_i32(i)?; + let (i, fentryoffsetlen) = be_i32(i)?; + let (i, fwritebasket) = be_i32(i)?; + let (i, fentrynumber) = be_i64(i)?; + let (i, foffset) = be_i32(i)?; + let (i, fmaxbaskets) = be_i32(i)?; + let (i, fsplitlevel) = be_i32(i)?; + let (i, fentries) = be_i64(i)?; + let (i, ffirstentry) = be_i64(i)?; + let (i, ftotbytes) = be_i64(i)?; + let (i, fzipbytes) = be_i64(i)?; + let (i, fbranches) = + length_value(checked_byte_count, tobjarray(tbranch_hdr, context))(i)?; + let (i, fleaves) = + length_value(checked_byte_count, tobjarray(TLeaf::parse_from_raw, context))(i)?; + let (i, fbaskets) = + length_value(checked_byte_count, + tobjarray(|_| |r: Raw<'s>| Ok((Raw { classinfo: r.classinfo, obj: &[] }, r.obj)), context))(i)?; + let (i, fbasketbytes) = be_u8.precedes(count(be_i32, fmaxbaskets as usize)).parse(i)?; + let (i, fbasketentry) = be_u8.precedes(count(be_i64, fmaxbaskets as usize)).parse(i)?; + let (i, fbasketseek) = be_u8.precedes(count(be_u64, fmaxbaskets as usize)).parse(i)?; + let (i, ffilename) = string(i)?; - let name = tnamed.name; - let fbaskets = fbaskets - .into_iter() - .filter(|s| !s.is_empty()) - .map(|s| Container::InMemory(s.to_vec())); - let nbaskets = fwritebasket as usize; - let fbasketbytes = fbasketbytes - .into_iter() - .take(nbaskets) - .map(|val| val as usize); - let fbasketentry = fbasketentry.into_iter().take(nbaskets).collect(); - let fbasketseek = fbasketseek.into_iter().take(nbaskets); - let source = if ffilename.is_empty() { - context.source.to_owned() - } else { - unimplemented!("Root files referencing other Root files is not implemented") - }; - let containers_disk = fbasketseek - .zip(fbasketbytes) - .map(|(seek, len)| Container::OnDisk(source.clone(), seek, len as u64)); - let containers = fbaskets.chain(containers_disk).collect(); - Ok(( - i, - TBranch { - name, - fcompress, - fbasketsize, - fentryoffsetlen, - fwritebasket, - fentrynumber, - foffset, - fsplitlevel, - fentries, - ffirstentry, - ftotbytes, - fzipbytes, - fbranches, - fleaves, - fbasketentry, - containers, - }, - )) + let name = tnamed.name; + let fbaskets = fbaskets + .into_iter() + .filter(|s| !s.is_empty()) + .map(|s| Container::InMemory(s.to_vec())); + let nbaskets = fwritebasket as usize; + let fbasketbytes = fbasketbytes + .into_iter() + .take(nbaskets) + .map(|val| val as usize); + let fbasketentry = fbasketentry.into_iter().take(nbaskets).collect(); + let fbasketseek = fbasketseek.into_iter().take(nbaskets); + let source = if ffilename.is_empty() { + context.source.to_owned() + } else { + unimplemented!("Root files referencing other Root files is not implemented") + }; + let containers_disk = fbasketseek + .zip(fbasketbytes) + .map(|(seek, len)| Container::OnDisk(source.clone(), seek, len as u64)); + let containers = fbaskets.chain(containers_disk).collect(); + Ok(( + i, + TBranch { + name, + fcompress, + fbasketsize, + fentryoffsetlen, + fwritebasket, + fentrynumber, + foffset, + fsplitlevel, + fentries, + ffirstentry, + ftotbytes, + fzipbytes, + fbranches, + fleaves, + fbasketentry, + containers, + }, + )) + } } diff --git a/root-io/src/tree_reader/container.rs b/root-io/src/tree_reader/container.rs index 397d93e..04373f6 100644 --- a/root-io/src/tree_reader/container.rs +++ b/root-io/src/tree_reader/container.rs @@ -1,9 +1,12 @@ -use failure::Error; -use nom::combinator::rest; -use nom::number::complete::*; use nom::*; +use nom::combinator::rest; +use nom::number::complete::{be_i8, be_u16}; +use nom::number::streaming::be_u32; +use nom::sequence::tuple; +use nom_supreme::ParserExt; use crate::core::*; +use crate::tree_reader::ReadError; #[derive(Debug, Clone)] pub(crate) enum Container { @@ -13,17 +16,17 @@ pub(crate) enum Container { OnDisk(Source, u64, u64), } + impl Container { /// Return the number of entries and the data; reading it from disk if necessary - pub(crate) async fn raw_data(self) -> Result<(u32, Vec), Error> { + pub(crate) async fn raw_data<'s>(self) -> Result<(u32, Vec), ReadError> { let buf = match self { Container::InMemory(buf) => buf, Container::OnDisk(source, seek, len) => source.fetch(seek, len).await?, }; - match tbasket2vec(buf.as_slice()) { - Ok((_, v)) => Ok(v), - _ => Err(format_err!("tbasket2vec parser failed")), - } + + let res = wrap_parser(tbasket2vec)(buf.as_slice())?; + Ok(res) } // /// For debugging: Try to find the file of this container. Out of luck if the container was inlined // pub(crate) fn file(&self) -> Option { @@ -37,56 +40,63 @@ impl Container { /// Return a tuple indicating the number of elements in this basket /// and the content as a Vec -#[rustfmt::skip::macros(do_parse)] -fn tbasket2vec(input: &[u8]) -> IResult<&[u8], (u32, Vec)> { - do_parse!(input, - hdr: tkey_header >> - _ver: be_u16 >> - _buf_size: be_u32 >> - _entry_size: be_u32 >> - n_entry_buf: be_u32 >> - last: be_u32 >> - _flag: be_i8 >> - buf: rest >> - ({ - let buf = if hdr.uncomp_len as usize > buf.len() { - decompress(buf).unwrap().1 - } else { - buf.to_vec() - }; - // Not the whole buffer is filled, no, no, no, that - // would be to easy! Its only filled up to `last`, - // whereby we have to take the key_len into account... - let useful_bytes = (last - hdr.key_len as u32) as usize; - (n_entry_buf, buf.as_slice()[..useful_bytes].to_vec()) - })) +fn tbasket2vec<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], (u32, Vec), E> + where + E : RootError<&'s [u8]> +{ + tuple(( + tkey_header.context("header"), + be_u16.context("version"), + be_u32.context("buffer size"), + be_u32.context("entry size"), + be_u32.context("number of entries in buffer"), + be_u32.context("last"), + be_i8.context("flags"), + rest.context("buffer") + )).map_res::<_, _, DecompressionError>(|(hdr, _, _, _, n_entry_buf, last, _, buf)| { + let buf = if hdr.uncomp_len as usize > buf.len() { + decompress(buf)? + } else { + buf.to_vec() + }; + // Not the whole buffer is filled, no, no, no, that + // would be to easy! Its only filled up to `last`, + // whereby we have to take the key_len into account... + let useful_bytes = (last - hdr.key_len as u32) as usize; + Ok((n_entry_buf, buf.as_slice()[..useful_bytes].to_vec())) + }).context("tbasket2vec").parse(input) } #[cfg(test)] mod tests { - use crate::core::tkey_header; use nom::*; + use std::fs::File; use std::io::{BufReader, Read, Seek, SeekFrom}; + use crate::core::tkey_header; + use crate::core::wrap_parser; + use crate::tree_reader::ReadError; + use super::tbasket2vec; #[test] - fn basket_simple() { + fn basket_simple() -> Result<(), ReadError> { let path = "./src/test_data/simple.root"; - let f = File::open(&path).unwrap(); + let f = File::open(&path)?; let mut reader = BufReader::new(f); // Go to first basket - reader.seek(SeekFrom::Start(218)).unwrap(); + reader.seek(SeekFrom::Start(218))?; // size from fbasketbytes let mut buf = vec![0; 86]; // let mut buf = vec![0; 386]; - reader.read_exact(&mut buf).unwrap(); + reader.read_exact(&mut buf)?; println!("{}", buf.to_hex(16)); - println!("{:?}", tkey_header(&buf)); + println!("{:?}", wrap_parser(tkey_header)(&buf)?); // println!("{:#?}", tbasket(&buf, be_u32)); - println!("{:#?}", tbasket2vec(&buf)); + println!("{:#?}", wrap_parser(tbasket2vec)(&buf)?); + Ok(()) } // /// Test the first basket of the "Tracks.fP[5]" branch diff --git a/root-io/src/tree_reader/leafs.rs b/root-io/src/tree_reader/leafs.rs index cd076fe..9718c54 100644 --- a/root-io/src/tree_reader/leafs.rs +++ b/root-io/src/tree_reader/leafs.rs @@ -1,15 +1,11 @@ -use std::fmt::Debug; - -use nom::{ - combinator::{map_res, verify}, - error::ParseError, - multi::length_value, - number::complete::*, - IResult, -}; - +use nom::{combinator::verify, error::ParseError, IResult, multi::length_value, number::complete::*, Parser}; +use nom::branch::alt; +use nom::sequence::tuple; +use nom_supreme::ParserExt; use quote::{Ident, Tokens}; +use std::fmt::Debug; + use crate::{code_gen::rust::ToRustType, core::*}; /// Parse a bool from a big endian u8 @@ -26,25 +22,25 @@ pub struct TLeaf { impl TLeaf { pub fn parse<'s, E>( - i: &'s [u8], + raw: Raw<'s>, context: &'s Context, - c_name: &str, - ) -> IResult<&'s [u8], Self, E> - where - E: ParseError<&'s [u8]> + Debug, + ) -> IResult, Self, E> + where + E: RootError<&'s [u8]>, { - TLeafVariant::parse(i, context, c_name).map(|(i, var)| (i, Self { variant: var })) + TLeafVariant::parse(raw, context).map(|(i, var)| (i, Self { variant: var })) } // A helper function to get around some lifetime issues on the caller sider pub(crate) fn parse_from_raw<'s, E>( - raw: &Raw<'s>, ctxt: &'s Context, - ) -> IResult<&'s [u8], Self, E> - where - E: ParseError<&'s [u8]> + Debug, + ) -> impl Parser, Self, E> + where + E: RootError<&'s [u8]>, { - Self::parse(raw.obj, ctxt, raw.classinfo) + move |raw: Raw<'s>| { + Self::parse(raw, ctxt) + } } } @@ -63,22 +59,24 @@ enum TLeafVariant { } impl TLeafVariant { - fn parse<'s, E>(i: &'s [u8], context: &'s Context, c_name: &str) -> IResult<&'s [u8], Self, E> - where - E: ParseError<&'s [u8]> + Debug, + fn parse<'s, E>(raw: Raw<'s>, context: &'s Context) -> IResult, Self, E> + where + E: RootError<&'s [u8]> + Debug, { - match c_name { - "TLeafB" => TLeafB::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafB(l))), - "TLeafS" => TLeafS::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafS(l))), - "TLeafI" => TLeafI::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafI(l))), - "TLeafL" => TLeafL::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafL(l))), - "TLeafF" => TLeafF::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafF(l))), - "TLeafD" => TLeafD::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafD(l))), - "TLeafC" => TLeafC::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafC(l))), - "TLeafO" => TLeafO::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafO(l))), - "TLeafD32" => TLeafD32::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafD32(l))), + let i = raw.obj; + let mk_raw = |i| Raw { obj: i, classinfo: raw.classinfo }; + match raw.classinfo { + "TLeafB" => TLeafB::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafB(l))), + "TLeafS" => TLeafS::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafS(l))), + "TLeafI" => TLeafI::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafI(l))), + "TLeafL" => TLeafL::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafL(l))), + "TLeafF" => TLeafF::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafF(l))), + "TLeafD" => TLeafD::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafD(l))), + "TLeafC" => TLeafC::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafC(l))), + "TLeafO" => TLeafO::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafO(l))), + "TLeafD32" => TLeafD32::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafD32(l))), "TLeafElement" => { - TLeafElement::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafElement(l))) + TLeafElement::parse(context).map(TLeafVariant::TLeafElement).parse(i).map(|(i, l)| (mk_raw(i), l)) } name => unimplemented!("Unexpected Leaf type {}", name), } @@ -101,12 +99,12 @@ macro_rules! make_tleaf_variant { impl $struct_name { fn parse<'s, E>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Self, E> where - E: ParseError<&'s [u8]> + Debug, + E: RootError<&'s [u8]>, { // All known descendens have version 1 let (i, _) = verify(be_u16, |&ver| ver == 1)(i)?; let (i, base) = - length_value(checked_byte_count, |i| TLeafBase::parse(i, context))(i)?; + length_value(checked_byte_count, TLeafBase::parse(context))(i)?; let (i, fminimum) = $parser(i)?; let (i, fmaximum) = $parser(i)?; let obj = Self { @@ -148,19 +146,21 @@ make_tleaf_variant! {TLeafD32, f32, be_f32} struct TLeafElement { base: TLeafBase, fid: i32, - ftype: TypeID, + ftype: TypeId, } impl TLeafElement { - fn parse<'s, E>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Self, E> - where - E: ParseError<&'s [u8]> + Debug, + fn parse<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Self, E> + where + E: RootError<&'s [u8]>, { - let (i, _) = verify(be_u16, |&ver| ver == 1)(i)?; - let (i, base) = length_value(checked_byte_count, |i| TLeafBase::parse(i, context))(i)?; - let (i, fid) = be_i32(i)?; - let (i, ftype) = map_res(be_i32, TypeID::new)(i)?; - Ok((i, Self { base, fid, ftype })) + be_u16.verify(|&ver| ver == 1).precedes( + tuple(( + length_value(checked_byte_count, TLeafBase::parse(context)), + be_i32, + be_i32.map_res(TypeId::new) + )).map(make_fn(|(base, fid, ftype)| Self { base, fid, ftype })) + ).context("TLeaf") } } @@ -186,42 +186,40 @@ struct TLeafBase { } impl TLeafBase { - fn parse<'s, E>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Self, E> - where - E: ParseError<&'s [u8]> + std::fmt::Debug, + fn parse<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Self, E> + where + E: RootError<&'s [u8]>, { - let (i, ver) = be_u16(i)?; - let (i, tnamed) = length_value(checked_byte_count, tnamed)(i)?; - let (i, flen) = be_i32(i)?; - let (i, flentype) = be_i32(i)?; - let (i, foffset) = be_i32(i)?; - let (i, fisrange) = be_bool(i)?; - let (i, fisunsigned) = be_bool(i)?; - let (i, fleafcount) = { - if peek!(i, be_u32)?.1 == 0 { - // Consume the bytes but we have no nested leafs - be_u32(i).map(|(i, _)| (i, None))? - } else { - let (i, r) = raw(i, context)?; - // We don't parse from the input buffer. TODO: Check - // that we consumed all bytes - let (_, tleaf) = TLeafVariant::parse(r.obj, context, r.classinfo)?; - (i, Some(Box::new(tleaf))) - } - }; - Ok(( - i, - Self { - ver, - tnamed, - flen, - flentype, - foffset, - fisrange, - fisunsigned, - fleafcount, - }, - )) + move |i| { + let (i, ver) = be_u16(i)?; + let (i, tnamed) = length_value(checked_byte_count, tnamed)(i)?; + let (i, flen) = be_i32(i)?; + let (i, flentype) = be_i32(i)?; + let (i, foffset) = be_i32(i)?; + let (i, fisrange) = be_bool(i)?; + let (i, fisunsigned) = be_bool(i)?; + let (i, fleafcount): (&'s [u8], Option>) = { + alt(( + be_u32.verify(|&v| v == 0).map(|_| None), + raw(context) + .and_then(|r: Raw<'s>| TLeafVariant::parse(r, context)) + .map(|leaf| Some(Box::new(leaf))) + )).parse(i)? + }; + Ok(( + i, + Self { + ver, + tnamed, + flen, + flentype, + foffset, + fisrange, + fisunsigned, + fleafcount, + }, + )) + } } } diff --git a/root-io/src/tree_reader/mod.rs b/root-io/src/tree_reader/mod.rs index a33215a..fbcbbbc 100644 --- a/root-io/src/tree_reader/mod.rs +++ b/root-io/src/tree_reader/mod.rs @@ -4,19 +4,51 @@ //! several elements per collision. This module provides two Iterator //! structs in order to iterate over these columns (`TBranches` in //! ROOT lingo). +use nom::error::VerboseError; +use thiserror::Error; + +use crate::core::DecompressionError; +use crate::tree_reader::ReadError::ParseError; + +pub use self::tree::{Tree, ttree}; mod branch; mod container; mod leafs; -mod tree; +pub mod tree; + +#[derive(Error, Debug)] +pub enum ReadError { + #[error("Error reading data")] + IoError(#[from] std::io::Error), + #[error("Error fetching data from online source")] + ReqwestError(#[from] reqwest::Error), + #[error("Error decompressing data")] + DecompressionError(#[from] DecompressionError), + #[error("Error parsing data")] + ParseError(VerboseError>), +} -pub use self::tree::{ttree, Tree}; +impl From>> for ReadError { + fn from(e: VerboseError>) -> ReadError { + ParseError(e) + } +} + +#[derive(Error, Debug)] +pub enum WriteError { + #[error(transparent)] + ReadError(#[from] ReadError), + #[error(transparent)] + FmtError(#[from] std::fmt::Error) +} #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { - use std::path::PathBuf; use tokio; + use std::path::PathBuf; + use crate::core::RootFile; #[tokio::test] diff --git a/root-io/src/tree_reader/tree.rs b/root-io/src/tree_reader/tree.rs index 03c611c..833412c 100644 --- a/root-io/src/tree_reader/tree.rs +++ b/root-io/src/tree_reader/tree.rs @@ -1,20 +1,20 @@ +use nom::branch::alt; +use nom::combinator::{cond, verify}; +use nom::multi::{count, length_value}; +use nom::multi::length_data; +use nom::number::complete::{be_f64, be_i32, be_i64, be_u16, be_u32, be_u8}; +use nom::Parser; +use nom::sequence::preceded; +use nom_supreme::ParserExt; +use thiserror::Error; + use std::fmt; use std::fmt::Debug; use std::ops::Deref; -use failure::Error; -use nom::{ - combinator::{cond, verify}, - error::ParseError, - multi::{count, length_value}, - number::complete::*, - sequence::preceded, - IResult, -}; - use crate::{ - core::parsers::*, core::types::*, tree_reader::branch::tbranch_hdr, - tree_reader::branch::TBranch, tree_reader::leafs::TLeaf, + core::parsers::*, core::types::*, tree_reader::branch::TBranch, + tree_reader::branch::tbranch_hdr, tree_reader::leafs::TLeaf, }; /// `TTree` potentially has members with very large `Vec` buffers @@ -90,6 +90,10 @@ pub struct Tree { fbranchref: Option, } +#[derive(Error, Debug)] +#[error("No branch named {0} (available: {1:?})")] +pub struct MissingBranch(String, Vec); + impl<'s> Tree { /// Get all branches of a tree (including nested ones) pub(crate) fn branches(&self) -> Vec<&TBranch> { @@ -108,120 +112,113 @@ impl<'s> Tree { .collect() } - pub fn branch_by_name(&self, name: &str) -> Result<&TBranch, Error> { + pub fn branch_by_name(&self, name: &str) -> Result<&TBranch, MissingBranch> { self.branches() .into_iter() .find(|b| b.name == name) - .ok_or_else(|| { - format_err!( - "Branch {} not found in tree: \n {:#?}", - name, - self.branches() - .iter() - .map(|b| b.name.to_owned()) - .collect::>() - ) - }) + .ok_or_else(|| MissingBranch(name.to_string(), + self.branches() + .iter() + .map(|b| b.name.to_string()) + .collect::>())) } } /// Parse a `Tree` from the given buffer. Usually used through `FileItem::parse_with`. #[allow(clippy::unnecessary_unwrap)] -pub fn ttree<'s, E>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Tree, E> -where - E: ParseError<&'s [u8]> + Debug, +pub fn ttree<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Tree, E> + where + E: RootError<&'s [u8]>, { - let _curried_raw = |i| raw(i, context); - let none_or_u8_buf = |i: &'s [u8]| { - switch!(i, peek!(be_u32), - 0 => map!(call!(be_u32), | _ | None) | - _ => map!( - map!(call!(_curried_raw), |r| r.obj.to_vec()), - Some) - ) - }; - let grab_checked_byte_count = |i| length_data!(i, checked_byte_count); - let (i, ver) = verify(be_u16, |v| [16, 17, 18, 19].contains(v))(i)?; - let (i, tnamed) = length_value(checked_byte_count, tnamed)(i)?; - let (i, _tattline) = grab_checked_byte_count(i)?; - let (i, _tattfill) = grab_checked_byte_count(i)?; - let (i, _tattmarker) = grab_checked_byte_count(i)?; - let (i, fentries) = be_i64(i)?; - let (i, ftotbytes) = be_i64(i)?; - let (i, fzipbytes) = be_i64(i)?; - let (i, fsavedbytes) = be_i64(i)?; - let (i, fflushedbytes) = cond(ver >= 18, be_i64)(i)?; - let (i, fweight) = be_f64(i)?; - let (i, ftimerinterval) = be_i32(i)?; - let (i, fscanfield) = be_i32(i)?; - let (i, fupdate) = be_i32(i)?; - let (i, _fdefaultentryoffsetlen) = cond(ver >= 17, be_i32)(i)?; - let (i, fnclusterrange) = cond(ver >= 19, be_i32)(i)?; - let (i, fmaxentries) = be_i64(i)?; - let (i, fmaxentryloop) = be_i64(i)?; - let (i, _fmaxvirtualsize) = be_i64(i)?; - let (i, _fautosave) = be_i64(i)?; - let (i, _fautoflush) = cond(ver >= 18, be_i64)(i)?; - let (i, festimate) = be_i64(i)?; - let (i, _fclusterrangeend) = { - if let Some(n_clst_range) = fnclusterrange { - preceded(be_u8, count(be_i64, n_clst_range as usize))(i) - .map(|(i, ends)| (i, Some(ends)))? - } else { - (i, None) - } - }; - let (i, _fclustersize) = { - if let Some(n_clst_range) = fnclusterrange { - preceded(be_u8, count(be_i64, n_clst_range as usize))(i) - .map(|(i, ends)| (i, Some(ends)))? - } else { - (i, None) - } - }; - let (i, fbranches) = - length_value(checked_byte_count, |i| tobjarray(tbranch_hdr, i, context))(i)?; - let (i, fleaves) = length_value(checked_byte_count, |i| { - tobjarray(TLeaf::parse_from_raw, i, context) - })(i)?; + move |i| { + let none_or_u8_buf = |i: &'s [u8]| { + alt(( + be_u32.verify(|&v| v == 0).precedes(be_u32.map(|_| None).cut()), + be_u32.verify(|&v| v != 0).precedes(raw(context)).map(|r| Some(r.obj.to_vec())) + )).parse(i) + }; + let (i, ver) = verify(be_u16, |v| [16, 17, 18, 19].contains(v))(i)?; + let (i, tnamed) = length_value(checked_byte_count, tnamed)(i)?; + let (i, _tattline) = length_data(checked_byte_count)(i)?; + let (i, _tattfill) = length_data(checked_byte_count)(i)?; + let (i, _tattmarker) = length_data(checked_byte_count)(i)?; + let (i, fentries) = be_i64(i)?; + let (i, ftotbytes) = be_i64(i)?; + let (i, fzipbytes) = be_i64(i)?; + let (i, fsavedbytes) = be_i64(i)?; + let (i, fflushedbytes) = cond(ver >= 18, be_i64)(i)?; + let (i, fweight) = be_f64(i)?; + let (i, ftimerinterval) = be_i32(i)?; + let (i, fscanfield) = be_i32(i)?; + let (i, fupdate) = be_i32(i)?; + let (i, _fdefaultentryoffsetlen) = cond(ver >= 17, be_i32)(i)?; + let (i, fnclusterrange) = cond(ver >= 19, be_i32)(i)?; + let (i, fmaxentries) = be_i64(i)?; + let (i, fmaxentryloop) = be_i64(i)?; + let (i, _fmaxvirtualsize) = be_i64(i)?; + let (i, _fautosave) = be_i64(i)?; + let (i, _fautoflush) = cond(ver >= 18, be_i64)(i)?; + let (i, festimate) = be_i64(i)?; + let (i, _fclusterrangeend) = { + if let Some(n_clst_range) = fnclusterrange { + preceded(be_u8, count(be_i64, n_clst_range as usize))(i) + .map(|(i, ends)| (i, Some(ends)))? + } else { + (i, None) + } + }; + let (i, _fclustersize) = { + if let Some(n_clst_range) = fnclusterrange { + preceded(be_u8, count(be_i64, n_clst_range as usize))(i) + .map(|(i, ends)| (i, Some(ends)))? + } else { + (i, None) + } + }; + let (i, fbranches) = + length_value(checked_byte_count, tobjarray(tbranch_hdr, context))(i)?; + let (i, fleaves) = length_value(checked_byte_count, + tobjarray(TLeaf::parse_from_raw, context), + )(i)?; - let (i, faliases) = none_or_u8_buf(i)?; - let (i, findexvalues) = tarray(be_f64, i)?; - let (i, findex) = tarray(be_i32, i)?; - let (i, ftreeindex) = none_or_u8_buf(i)?; - let (i, ffriends) = none_or_u8_buf(i)?; - let (i, fuserinfo) = none_or_u8_buf(i)?; - let (i, fbranchref) = none_or_u8_buf(i)?; - let ftreeindex = ftreeindex.map(Pointer); - let ffriends = ffriends.map(Pointer); - let fuserinfo = fuserinfo.map(Pointer); - let fbranchref = fbranchref.map(Pointer); - Ok(( - i, - Tree { - ver, - tnamed, - fentries, - ftotbytes, - fzipbytes, - fsavedbytes, - fflushedbytes, - fweight, - ftimerinterval, - fscanfield, - fupdate, - fmaxentries, - fmaxentryloop, - festimate, - fbranches, - fleaves, - faliases, - findexvalues, - findex, - ftreeindex, - ffriends, - fuserinfo, - fbranchref, - }, - )) + let (i, faliases) = none_or_u8_buf(i)?; + let (i, findexvalues) = tarray(be_f64).parse(i)?; + let (i, findex) = tarray(be_i32).parse(i)?; + let (i, ftreeindex) = none_or_u8_buf(i)?; + let (i, ffriends) = none_or_u8_buf(i)?; + let (i, fuserinfo) = none_or_u8_buf(i)?; + let (i, fbranchref) = none_or_u8_buf(i)?; + let ftreeindex = ftreeindex.map(Pointer); + let ffriends = ffriends.map(Pointer); + let fuserinfo = fuserinfo.map(Pointer); + let fbranchref = fbranchref.map(Pointer); + Ok(( + i, + Tree { + ver, + tnamed, + fentries, + ftotbytes, + fzipbytes, + fsavedbytes, + fflushedbytes, + fweight, + ftimerinterval, + fscanfield, + fupdate, + fmaxentries, + fmaxentryloop, + festimate, + fbranches, + fleaves, + faliases, + findexvalues, + findex, + ftreeindex, + ffriends, + fuserinfo, + fbranchref, + }, + )) + } } diff --git a/root-io/tests/high_level_io.rs b/root-io/tests/high_level_io.rs index ec8f152..1e2b20c 100644 --- a/root-io/tests/high_level_io.rs +++ b/root-io/tests/high_level_io.rs @@ -1,7 +1,7 @@ #![cfg(all(test, not(target_arch = "wasm32")))] -use std::path::PathBuf; use root_io::*; +use std::path::PathBuf; const TEST_FILES: &[&str] = &[ "./src/test_data/simple.root", diff --git a/root-io/tests/read_esd.rs b/root-io/tests/read_esd.rs index 013f7d6..883ad5a 100644 --- a/root-io/tests/read_esd.rs +++ b/root-io/tests/read_esd.rs @@ -1,8 +1,8 @@ -use failure::Error; use futures::prelude::*; use nom::number::complete::*; use nom::sequence::tuple; +use root_io::tree_reader::tree::MissingBranch; use root_io::{ core::parsers::{parse_custom_mantissa, parse_tobjarray_of_tnameds}, stream_zip, @@ -30,7 +30,7 @@ struct Model { } impl Model { - async fn stream_from_tree(t: &Tree) -> Result + '_, Error> { + async fn stream_from_tree(t: &Tree) -> Result + '_, MissingBranch> { let track_counter: Vec<_> = t .branch_by_name("Tracks")? .as_fixed_size_iterator(|i| be_u32(i)) @@ -109,10 +109,11 @@ impl Model { #[cfg(target_arch = "wasm32")] mod wasm { - use super::*; use reqwest::Url; use wasm_bindgen_test::*; + use super::*; + wasm_bindgen_test_configure!(run_in_browser); #[wasm_bindgen_test(async)] @@ -132,11 +133,10 @@ mod wasm { #[cfg(not(target_arch = "wasm32"))] mod x64 { use super::*; - use reqwest::Url; const REMOTE_FILE: &str = - "http://opendata.web.cern.ch/eos/opendata/alice/2010/LHC10h/000139038/ESD/0001/AliESDs.root"; + "http://opendata.web.cern.ch/eos/opendata/alice/2010/LHC10h/000139038/ESD/0001/AliESDs.root"; #[tokio::test] async fn read_esd_local_and_remote() { diff --git a/root-io/tests/read_simple.rs b/root-io/tests/read_simple.rs index 82c1670..f1856e0 100644 --- a/root-io/tests/read_simple.rs +++ b/root-io/tests/read_simple.rs @@ -1,10 +1,12 @@ -use std::pin::Pin; - -use failure::Error; use futures::{Stream, StreamExt}; use nom::number::complete::*; +use nom::Parser; -use root_io::{core::parsers::string, stream_zip, tree_reader::Tree, RootFile}; +use root_io::{ + core::parsers::string, stream_zip, tree_reader::tree::MissingBranch, tree_reader::Tree, + RootFile, +}; +use std::pin::Pin; /// A model for the (or a subset) of the data. /// This is the object which contains the data of one "event" @@ -17,14 +19,14 @@ struct Model { } impl Model { - fn stream_from_tree(t: Tree) -> Result>>, Error> { + fn stream_from_tree(t: Tree) -> Result>>, MissingBranch> { Ok(stream_zip!( t.branch_by_name("one")? .as_fixed_size_iterator(|i| be_i32(i)), t.branch_by_name("two")? .as_fixed_size_iterator(|i| be_f32(i)), t.branch_by_name("three")? - .as_fixed_size_iterator(|i| string(i)) + .as_fixed_size_iterator(|i| string.map(|s| s.to_string()).parse(i)) ) .map(|(one, two, three)| Self { one, two, three }) .boxed_local()) @@ -55,11 +57,12 @@ mod x64 { #[cfg(all(test, target_arch = "wasm32"))] mod wasm { - wasm_bindgen_test_configure!(run_in_browser); - use super::*; use reqwest::Url; use wasm_bindgen_test::{wasm_bindgen_test, wasm_bindgen_test_configure}; + use super::*; + + wasm_bindgen_test_configure!(run_in_browser); #[wasm_bindgen_test] async fn read_simple_remote() { let url = Url::parse("http://127.0.0.1:3030/github/cbourjau/alice-rs/master/root-io/src/test_data/simple.root").unwrap(); diff --git a/root-ls/Cargo.toml b/root-ls/Cargo.toml index cf62504..53546ae 100644 --- a/root-ls/Cargo.toml +++ b/root-ls/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "root-ls" -version = "0.2.0" +version = "0.2.1" authors = ["cbourjau "] description = "CLI tool to inspect the content and layout of `.root` files" repository = "https://github.com/cbourjau/alice-rs" @@ -14,7 +14,7 @@ edition = "2018" [dependencies] clap = "2" failure = "0.1" -root-io = { version="0.3", path="../root-io" } +root-io = { version="0.3.1", path="../root-io" } prettyplease = "0.1" syn = "1" tokio = { version = "1", features = ["full"] } diff --git a/root-ls/src/main.rs b/root-ls/src/main.rs index 506c714..46759a3 100644 --- a/root-ls/src/main.rs +++ b/root-ls/src/main.rs @@ -1,5 +1,6 @@ use std::env; - +//use std::io::Write; +//use std::process::{Command, Stdio}; use std::path::Path; use clap::{crate_version, value_t, App, AppSettings, Arg, ArgMatches, SubCommand}; From 2b14b97f85b8d489b07ce42757e0b04c13f026ad Mon Sep 17 00:00:00 2001 From: lschuetze Date: Wed, 2 Mar 2022 15:34:42 +0100 Subject: [PATCH 2/7] Bug fixes and polishing --- .gitignore | 1 - malice/src/event.rs | 8 +- root-io/Cargo.toml | 2 + root-io/src/core/data_source.rs | 2 +- root-io/src/core/file.rs | 121 ++++--- root-io/src/core/file_item.rs | 21 +- root-io/src/core/mod.rs | 41 +++ root-io/src/core/parsers.rs | 455 ++++++++++++++++++--------- root-io/src/core/tkey.rs | 22 +- root-io/src/core/tstreamer.rs | 221 +++++++------ root-io/src/core/tstreamerinfo.rs | 24 +- root-io/src/core/types.rs | 12 +- root-io/src/lib.rs | 1 + root-io/src/tests/basic_io.rs | 23 +- root-io/src/tree_reader/branch.rs | 78 +++-- root-io/src/tree_reader/container.rs | 13 +- root-io/src/tree_reader/leafs.rs | 85 +++-- root-io/src/tree_reader/mod.rs | 38 +-- root-io/src/tree_reader/tree.rs | 78 +++-- root-io/tests/high_level_io.rs | 8 +- 20 files changed, 734 insertions(+), 520 deletions(-) diff --git a/.gitignore b/.gitignore index 95a6c32..4b03de8 100644 --- a/.gitignore +++ b/.gitignore @@ -5,5 +5,4 @@ *tar.gz *.clang_complete *.root - .idea/ diff --git a/malice/src/event.rs b/malice/src/event.rs index de7586c..e121274 100644 --- a/malice/src/event.rs +++ b/malice/src/event.rs @@ -4,10 +4,10 @@ use std::fmt::Debug; use failure::Error; use futures::prelude::*; use itertools::izip; -use nom::{combinator::map, number::complete::*, sequence::tuple, IResult}; +use nom::{combinator::map, number::complete::*, sequence::tuple}; use wasm_bindgen::prelude::*; -use root_io::core::parsers::{parse_custom_mantissa, parse_tobjarray_of_tnameds, RootError}; +use root_io::core::parsers::{Span, RResult, parse_custom_mantissa, parse_tobjarray_of_tnameds, RootError}; use root_io::stream_zip; use root_io::tree_reader::Tree; @@ -241,9 +241,9 @@ fn string_to_mask(s: &str, run_number: i32) -> TriggerMask { } } -fn parse_pid_probabilities<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], PidProbabilities, E> +fn parse_pid_probabilities<'s, E>(input: Span<'s>) -> RResult<'s, PidProbabilities, E> where - E: RootError<&'s [u8]>, + E: RootError>, { let (input, electron) = parse_custom_mantissa(input, 8)?; let (input, muon) = parse_custom_mantissa(input, 8)?; diff --git a/root-io/Cargo.toml b/root-io/Cargo.toml index 1e48c65..22c863b 100644 --- a/root-io/Cargo.toml +++ b/root-io/Cargo.toml @@ -24,7 +24,9 @@ uuid = "0.8.2" lz4-compress = "0.1.1" nom = "7" nom-supreme = "0.6" +nom_locate = "4.0.0" thiserror = "1" +ouroboros = "0.14" [target.'cfg(target_arch = "wasm32")'.dependencies] wasm-bindgen-futures = "0.4" diff --git a/root-io/src/core/data_source.rs b/root-io/src/core/data_source.rs index e096f36..c351087 100644 --- a/root-io/src/core/data_source.rs +++ b/root-io/src/core/data_source.rs @@ -9,7 +9,7 @@ use std::io::{Read, Seek, SeekFrom}; use std::path::Path; use std::path::PathBuf; -use crate::tree_reader::ReadError; +use crate::core::ReadError; /// The source from where the Root file is read. Construct it using /// `.into()` on a `Url` or `Path`. The latter is not availible for diff --git a/root-io/src/core/file.rs b/root-io/src/core/file.rs index 90388a5..f2a8d7d 100644 --- a/root-io/src/core/file.rs +++ b/root-io/src/core/file.rs @@ -1,5 +1,4 @@ use nom::{self, - IResult, number::complete::{be_i16, be_i32, be_u128, be_u16, be_u32, be_u64, be_u8}, Parser}; use nom::sequence::tuple; use nom_supreme::{ParserExt, tag::complete::tag}; @@ -13,7 +12,7 @@ use crate::{ core::tstreamer::streamers, MAP_OFFSET, }; -use crate::tree_reader::{ReadError, WriteError}; +use crate::core::{ReadError, WriteError}; /// Size of serialized `FileHeader` in bytes const FILE_HEADER_SIZE: u64 = 75; @@ -60,57 +59,60 @@ pub struct Directory { } /// Parse opening part of a root file -fn file_header<'s, E: RootError<&'s [u8]>>(i: &'s [u8]) -> IResult<&'s [u8], FileHeader, E> { - fn version_dep_int<'s, E: RootError<&'s [u8]>>(i: &'s [u8], is_64_bit: bool) -> IResult<&'s [u8], u64, E> { - if is_64_bit { - be_u64(i) - } else { - let (i, end) = be_u32(i)?; - Ok((i, end as u64)) +fn file_header<'s, E: RootError>>(i: Span<'s>) -> RResult<'s, FileHeader, E> { + let parser = |i| { + fn version_dep_int<'s, E: RootError>>(i: Span<'s>, is_64_bit: bool) -> RResult<'s, u64, E> { + if is_64_bit { + be_u64(i) + } else { + let (i, end) = be_u32(i)?; + Ok((i, end as u64)) + } } - } - let (i, _) = tag("root")(i)?; - let (i, version) = be_i32(i)?; - let is_64_bit = version > 1000000; - let (i, begin) = be_i32(i)?; - let (i, end) = version_dep_int(i, is_64_bit)?; - let (i, seek_free) = version_dep_int(i, is_64_bit)?; - let (i, nbytes_free) = be_i32(i)?; - let (i, n_entries_free) = be_i32(i)?; - let (i, n_bytes_name) = be_i32(i)?; - let (i, pointer_size) = be_u8(i)?; - let (i, compression) = be_i32(i)?; - let (i, seek_info) = version_dep_int(i, is_64_bit)?; - let (i, nbytes_info) = be_i32(i)?; - let (i, _uuid_version) = be_u16(i)?; - let (i, uuid) = be_u128(i)?; - - let uuid = Uuid::from_u128(uuid); - let seek_dir = (begin + n_bytes_name) as u64; - Ok(( - i, - FileHeader { - version, - begin, - end, - seek_free, - nbytes_free, - n_entries_free, - n_bytes_name, - pointer_size, - compression, - seek_info, - nbytes_info, - uuid, - seek_dir, - }, - )) + let (i, _) = tag("root")(i)?; + let (i, version) = be_i32(i)?; + let is_64_bit = version > 1000000; + let (i, begin) = be_i32(i)?; + let (i, end) = version_dep_int(i, is_64_bit)?; + let (i, seek_free) = version_dep_int(i, is_64_bit)?; + let (i, nbytes_free) = be_i32(i)?; + let (i, n_entries_free) = be_i32(i)?; + let (i, n_bytes_name) = be_i32(i)?; + let (i, pointer_size) = be_u8(i)?; + let (i, compression) = be_i32(i)?; + let (i, seek_info) = version_dep_int(i, is_64_bit)?; + let (i, nbytes_info) = be_i32(i)?; + let (i, _uuid_version) = be_u16(i)?; + let (i, uuid) = be_u128(i)?; + + let uuid = Uuid::from_u128(uuid); + let seek_dir = (begin + n_bytes_name) as u64; + Ok(( + i, + FileHeader { + version, + begin, + end, + seek_free, + nbytes_free, + n_entries_free, + n_bytes_name, + pointer_size, + compression, + seek_info, + nbytes_info, + uuid, + seek_dir, + }, + )) + }; + parser.context("file header").parse(i) } /// Parse a file-pointer based on the version of the file -fn versioned_pointer<'s, E>(version: i16) -> impl nom::Parser<&'s [u8], u64, E> +fn versioned_pointer<'s, E>(version: i16) -> impl RParser<'s, u64, E> where - E: RootError<&'s [u8]> + E: RootError> { move |i| { if version > 1000 { @@ -122,9 +124,9 @@ fn versioned_pointer<'s, E>(version: i16) -> impl nom::Parser<&'s [u8], u64, E> } /// Directory within a root file; exists on ever file -fn directory<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], Directory, E> +fn directory<'s, E>(input: Span<'s>) -> RResult<'s, Directory, E> where - E: RootError<&'s [u8]> + E: RootError> { tuple(( be_i16.context("directory version"), @@ -158,7 +160,7 @@ impl RootFile { pub async fn new>(source: S) -> Result { let source = source.into(); let hdr_buf = source.fetch(0, FILE_HEADER_SIZE).await?; - let hdr = wrap_parser(file_header.all_consuming())(&hdr_buf)?; + let hdr = wrap_parser(file_header.context("file header"))(&hdr_buf)?; //let hdr = _hdr?; // Jump to the TDirectory and parse it @@ -179,13 +181,11 @@ impl RootFile { } pub async fn get_streamer_context(&self) -> Result { - let seek_info_len = (self.hdr.nbytes_info + 4) as u64; - let info_key_buf = self - .source + let seek_info_len = (self.hdr.nbytes_info) as u64; + let info_key_buf = self.source .fetch(self.hdr.seek_info, seek_info_len) .await?; - let info_key = wrap_parser(tkey.all_consuming())(&info_key_buf)?; - + let info_key = wrap_parser(tkey.all_consuming().context("streamer info key"))(&info_key_buf)?; let key_len = info_key.hdr.key_len; Ok(Context { source: self.source.clone(), @@ -202,8 +202,7 @@ impl RootFile { /// Get the stream info of this file pub async fn streamer_infos(&self) -> Result, ReadError> { let ctx = self.get_streamer_context().await?; - let buf = ctx.s.as_slice(); - let res = wrap_parser(streamers(&ctx))(buf)?; + let res = wrap_parser_ctx(streamers)(&ctx)?; Ok(res) } @@ -326,7 +325,7 @@ mod test { .await .and_then(|buf| { file_header(&buf) - .map_err(|_| format_err!("Failed to parse file header")) + .map_err(|_| ParseError(e)) .map(|(_i, o)| o) }) .unwrap(); @@ -336,7 +335,7 @@ mod test { .await .and_then(|buf| { directory(&buf) - .map_err(|_| format_err!("Failed to parse file header")) + .map_err(|e| ParseError(e)) .map(|(_i, o)| o) }) .unwrap(); @@ -374,7 +373,7 @@ mod test { .await .and_then(|buf| { tkey(&buf) - .map_err(|_| format_err!("Failed to parse file header")) + .map_err(|e| ParseError(e)) .map(|(_i, o)| o) }) .unwrap(); diff --git a/root-io/src/core/file_item.rs b/root-io/src/core/file_item.rs index 8459b74..652be2b 100644 --- a/root-io/src/core/file_item.rs +++ b/root-io/src/core/file_item.rs @@ -3,7 +3,8 @@ use nom_supreme::ParserExt; use crate::core::{checked_byte_count, Context, Source, TKeyHeader, wrap_parser}; use crate::core::compression::decompress; -use crate::tree_reader::{ReadError, Tree, ttree}; +use crate::core::ReadError; +use crate::tree_reader::{Tree, ttree}; /// Describes a single item within this file (e.g. a `Tree`) #[derive(Debug)] @@ -61,8 +62,10 @@ impl FileItem { let ctx = self.get_context().await?; let buf = ctx.s.as_slice(); - let res = wrap_parser( - length_value(checked_byte_count, ttree(&ctx)).all_consuming() + let res = wrap_parser(length_value(checked_byte_count, ttree(&ctx)) + .complete() + .all_consuming() + .context("ttree wrapper") )(buf)?; Ok(res) } @@ -73,15 +76,15 @@ mod tests { use std::path::Path; use crate::core::RootFile; + use crate::core::UnwrapPrint; #[tokio::test] async fn open_simple() { let path = Path::new("./src/test_data/simple.root"); - let f = RootFile::new(path).await.expect("Failed to open file"); + let f = RootFile::new(path).await.unwrap_print(); assert_eq!(f.items().len(), 1); assert_eq!(f.items()[0].tkey_hdr.obj_name, "tree"); - // Only streamers; not rules - assert_eq!(f.streamer_infos().await.unwrap().len(), 18); + assert_eq!(f.streamer_infos().await.unwrap_print().len(), 18); } #[tokio::test] @@ -90,13 +93,11 @@ mod tests { use alice_open_data; let path = alice_open_data::test_file().unwrap(); - let f = RootFile::new(path.as_path()) - .await - .expect("Failed to open file"); + let f = RootFile::new(path.as_path()).await.unwrap_print(); assert_eq!(f.items().len(), 2); assert_eq!(f.items()[0].tkey_hdr.obj_name, "esdTree"); assert_eq!(f.items()[1].tkey_hdr.obj_name, "HLTesdTree"); - assert_eq!(f.streamer_infos().await.unwrap().len(), 87); + assert_eq!(f.streamer_infos().await.unwrap_print().len(), 87); } } diff --git a/root-io/src/core/mod.rs b/root-io/src/core/mod.rs index e06822d..35e0915 100644 --- a/root-io/src/core/mod.rs +++ b/root-io/src/core/mod.rs @@ -2,6 +2,7 @@ //! the self-description of a root file. These parsers can be used to //! build new parsers using the [root-ls](https://github.com/cbourjau/alice-rs) cli. use thiserror::Error; +use crate::core::ReadError::ParseError; pub(crate) use self::compression::*; pub use self::compression::DecompressionError; @@ -35,4 +36,44 @@ pub enum SemanticError { #[derive(Debug)] pub enum Component { TStreamerElement +} + +#[derive(Error, Debug)] +pub enum ReadError { + #[error("Error reading data")] + IoError(#[from] std::io::Error), + #[error("Error fetching data from online source")] + ReqwestError(#[from] reqwest::Error), + #[error("Error decompressing data")] + DecompressionError(#[from] DecompressionError), + #[error("Error parsing data")] + ParseError(VerboseErrorInfo), +} + +pub trait UnwrapPrint { + fn unwrap_print(self) -> T; +} + +impl UnwrapPrint for Result { + fn unwrap_print(self) -> T { + match self { + Ok(v) => v, + Err(ParseError(e)) => { panic!("Tried to unwrap a parse error:\n{}", e); }, + Err(e) => { panic!("Tried to unwrap a read error:\n{}", e) } + } + } +} + +impl From for ReadError { + fn from(e: VerboseErrorInfo) -> ReadError { + ParseError(e) + } +} + +#[derive(Error, Debug)] +pub enum WriteError { + #[error(transparent)] + ReadError(#[from] ReadError), + #[error(transparent)] + FmtError(#[from] std::fmt::Error) } \ No newline at end of file diff --git a/root-io/src/core/parsers.rs b/root-io/src/core/parsers.rs index 50572fa..5b3c8d2 100644 --- a/root-io/src/core/parsers.rs +++ b/root-io/src/core/parsers.rs @@ -1,10 +1,14 @@ -use nom::{self, bytes::complete::{take, take_until}, combinator::{map_res, rest}, error::ParseError, IResult, multi::{count, length_data, length_value}, number::complete::{be_i32, be_u16, be_u32, be_u8}, Parser, sequence::{pair, tuple}}; +use nom::{self, bytes::complete::{take, take_until}, combinator::rest, error::ParseError, IResult, multi::{count, length_data, length_value}, number::complete::{be_i32, be_u16, be_u32, be_u8}, Parser, sequence::{pair, tuple}}; use nom::branch::alt; -use nom::combinator::{cond, eof}; +use nom::combinator::cond; use nom::error::{ContextError, FromExternalError, VerboseError}; +use nom::HexDisplay; use nom::multi::length_count; +use nom::Slice; +use nom_locate::LocatedSpan; use nom_supreme::parser_ext::ParserExt; use nom_supreme::tag::TagError; +use ouroboros::self_referencing; /// Parsers of the ROOT core types. Note that objects in ROOT files /// are often, but not always, preceeded by their size. The parsers in @@ -38,23 +42,147 @@ impl + FromExternalError + Debug> RootError for T {} +pub type Span<'s> = LocatedSpan<&'s [u8]>; +pub type RResult<'s, O, E> = IResult, O, E>; + +pub trait RParser<'s, O, E: RootError>>: Parser, O, E> {} + +impl<'s, O, E: RootError>, T: Parser, O, E>> RParser<'s, O, E> for T {} + /// Corerce a closure to a Fn, for use with map_res et al. pub(crate) fn make_fn U>(f: F) -> F { f } +#[self_referencing] +#[derive(Debug)] +pub struct VerboseErrorInfo { + input: Vec, + #[borrows(input)] + #[covariant] + error: VerboseError>, +} + +impl std::fmt::Display for VerboseErrorInfo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { + use nom::error::VerboseErrorKind::*; + use nom::error::ErrorKind as Kind; + + writeln!(f, "Error while parsing this block of data:")?; + if self.borrow_input().len() > 0x100 { + write!(f, "{}", self.borrow_input()[..0x100].to_hex(16))?; + writeln!(f, " \t[0x{:x} of 0x{:x} bytes omitted]", self.borrow_input().len() - 0x100, self.borrow_input().len())?; + } else { + write!(f, "{}", self.borrow_input().to_hex(16))?; + } + + for (span, kind) in self.borrow_error().errors.iter().rev() { + match kind { + Context(context) => write!(f, "\nWhile trying to parse {}:", context)?, + Char(c) => write!(f, "\nWhile trying to match a '{}':", c)?, + Nom(Kind::Verify) => continue, + Nom(Kind::Complete) => { write!(f, "\nExpected length exceeds buffer")?; continue }, + Nom(Kind::Eof) => if span.fragment().is_empty() { + // Yes, EOF is returned both for parsers expecting more input (the be_uXX + // parsers for us, mostly), but also by parsers expecting *no more* input + // such as all_consuming. + // We distinguish based on the remaining input - if everything was consumed, + // it must have been a premature EOF + write!(f, "\nUnexpected EOF")? + } else { + write!(f, "\nExpected EOF, but found excess data")? + }, + Nom(kind) => write!(f, "\nIn {:?}:", kind)? + }; + + let fragment_begin = span.location_offset(); + let fragment_end = match kind { + Context(_) | Nom(_) => span.location_offset() + std::cmp::max(1, std::cmp::min(0x100, span.fragment().len())), + Char(_) => span.location_offset() + 1 + }; + // Align hexdump to 16-byte blocks + let hexdump_begin = fragment_begin / 16 * 16; + let hexdump_first_line_end = std::cmp::min(self.borrow_input().len(), hexdump_begin + 16); + let hexdump_end = (fragment_end + 16) / 16 * 16; + let hexdump_end = std::cmp::min(self.borrow_input().len(), hexdump_end); + + // 2 letters per byte + one space + let fragment_begin_in_dump = 3 * (fragment_begin % 16); + let fragment_end_in_dump = 3 * ((fragment_end - 1) % 16) + 1; + + write!(f, "\n{}", self.borrow_input()[hexdump_begin..hexdump_first_line_end].to_hex_from(16, hexdump_begin))?; + if fragment_begin == self.borrow_input().len() { + write!(f, " \t{: >skip$} [at end of input]", '^', skip=fragment_begin_in_dump + 1)?; + } else if fragment_begin / 16 == fragment_end / 16 { + write!(f, " \t{: >skip$}{:~>len$}", + '^', '~', + skip = fragment_begin_in_dump + 1, + len = fragment_end_in_dump - fragment_begin_in_dump)? + } else { + write!(f, " \t{: >skip$}{:~>len$}", + '^', '~', + skip = fragment_begin_in_dump + 1, + len = (3 * 15 + 1) - fragment_begin_in_dump)?; + write!(f, "\n{}", self.borrow_input()[hexdump_begin + 16..hexdump_end].to_hex_from(16, hexdump_begin + 16))?; + if span.fragment().len() > 0x100 { + write!(f, " \t[0x{:x} bytes omitted]", span.fragment().len() - 0x100)?; + } else { + write!(f, " \t{:~>len$}", '~', len = fragment_end_in_dump + 1)?; + } + } + writeln!(f)?; + } + + Ok(()) + } +} -pub(crate) fn wrap_parser<'s, O>(parser: impl Parser<&'s [u8], O, VerboseError<&'s [u8]>>) -> impl FnMut(&'s [u8]) -> Result>> +fn reborrow_spans<'s, 't>(new_base: &'s [u8], error: VerboseError>) -> VerboseError> { + let reborrow = |span: &Span<'_>| unsafe { + Span::new_from_raw_offset(span.location_offset(), + span.location_line(), + &new_base[span.location_offset()..span.location_offset() + span.fragment().len()], + ()) + }; + VerboseError { + errors: error.errors.iter().map(|(span, kind)| (reborrow(span), kind.clone())).collect::>() + } +} + +pub fn wrap_parser<'s, O>(parser: impl Parser, O, VerboseError>>) -> impl FnMut(&'s [u8]) -> Result { let mut parser = parser.complete(); - move |input| match parser.parse(input) { + move |input| match parser.parse(Span::new(input)) { Ok((_, parsed)) => Ok(parsed), Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { - let errors = err.errors.iter().map(|(i, kind)| (i.to_vec(), kind.clone())).collect(); - Err(VerboseError { errors }) - }, + let info = VerboseErrorInfoBuilder { + input: input.to_vec(), + error_builder: |input| reborrow_spans(input, err), + }; + Err(info.build()) + } + Err(nom::Err::Incomplete(..)) => { + unreachable!("Complete combinator should make this impossible") + } + } +} + +pub fn wrap_parser_ctx<'s, O, F, P>(parser_gen: F) -> impl FnMut(&'s Context) -> Result + where + P: Parser, O, VerboseError>>, + F: Fn(&'s Context) -> P +{ + move |ctx| match parser_gen(ctx).complete().parse(ctx.span()) { + Ok((_, parsed)) => Ok(parsed), + Err(nom::Err::Error(err)) | Err(nom::Err::Failure(err)) => { + let info = VerboseErrorInfoBuilder { + input: ctx.s.to_vec(), + error_builder: |input| reborrow_spans(input, err), + }; + Err(info.build()) + } Err(nom::Err::Incomplete(..)) => { unreachable!("Complete combinator should make this impossible") } @@ -68,44 +196,44 @@ fn is_byte_count(v: &u32) -> bool { /// Return the size in bytes of the following object in the input. The /// count is the remainder of this object minus the size of the count. -pub fn checked_byte_count<'s, E>(input: &'s [u8]) -> IResult<&[u8], u32, E> +pub fn checked_byte_count<'s, E>(input: Span<'s>) -> RResult<'s, u32, E> where - E: RootError<&'s [u8]>, + E: RootError>, { - be_u32.verify(is_byte_count).context("does not match bytecount mask") - .map(|v| v & Flags::BYTE_COUNT_MASK.bits()) - .verify(|&v| v != 0).context("byte count must not be 0") - .verify(|&v| v < 0x4000_0000).context("highest bit in byte count must be unset") + be_u32.verify(is_byte_count).context("assertion: byte count matches bytecount mask") + .map(|v| v & !Flags::BYTE_COUNT_MASK.bits()) + .verify(|&v| v != 0).context("assertion: byte count must not be 0") + .verify(|&v| v < 0x8000_0000).context("assertion: highest bit in byte count must be unset") .parse(input) } /// Read ROOT's string length prefix, which is usually a u8, but can be extended /// to a u32 (for a total of 5 bytes) if the first byte is 255 -fn string_length_prefix<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], u32, E> +fn string_length_prefix<'s, E>(input: Span<'s>) -> RResult<'s, u32, E> where - E: RootError<&'s [u8]>, + E: RootError>, { alt(( - be_u8.verify(|&v| v == 255).precedes(be_u32).cut().context("extended string length prefix"), + be_u8.verify(|&v| v == 255).precedes(be_u32).context("extended string length prefix"), be_u8.verify(|&v| v != 255).map(|v| v as u32).context("short string length prefix") ))(input) } /// Read ROOT's version of short and long strings (preceeded by u8). Does not read null terminated! -pub fn string<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], &'s str, E> +pub fn string<'s, E>(input: Span<'s>) -> RResult<'s, &'s str, E> where - E: RootError<&'s [u8]>, + E: RootError>, { length_data(string_length_prefix) - .map_res(str::from_utf8) + .map_res(|s| str::from_utf8(&s)) .context("length-prefixed string") .parse(input) } /// Parser for the most basic of ROOT types -pub fn tobject<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], TObject, E> +pub fn tobject<'s, E>(input: Span<'s>) -> RResult<'s, TObject, E> where - E: RootError<&'s [u8]>, + E: RootError>, { tuple(( be_u16.context("tobject version"), @@ -120,40 +248,45 @@ pub fn tobject<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], TObject, E> } /// Parse a `TList` -pub fn tlist<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Vec>, E> +pub fn tlist<'s, E>(ctx: &'s Context) -> impl RParser<'s, Vec>, E> where - E: RootError<&'s [u8]>, + E: RootError>, { - RootContextParser { - context, - parser: |ctx, inpt| { - let (i, _ver) = be_u16.context("tlist version") - .verify(|&v| v == 5).context("tlist version must be 5").parse(inpt)?; - let (i, (_tobj, _name, num_obj)) = tuple((tobject, string, be_i32))(i)?; - let (i, objs) = count( - |i| { - let (i, obj) = length_value(checked_byte_count, raw(ctx)) - .context("entry in tlist") - .parse(i)?; - // TODO verify remaining entry data - let (i, _) = length_data(be_u8)(i)?; - Ok((i, obj)) - }, - num_obj as usize, - )(i)?; - - // TODO: Verify rest - let (i, _) = rest(i)?; - Ok((i, objs)) - }, - }//.context("tlist") + let parser = move |inpt| { + let (i, _ver) = be_u16.context("tlist version") + .verify(|&v| v == 5).context("assertion: tlist version must be 5").parse(inpt)?; + let (i, (_tobj, _name, num_obj)) = tuple(( + tobject.context("tlist object header"), + string.context("tlist name"), + be_i32.context("tlist element count") + ))(i)?; + + let (i, objs) = count( + |i: Span<'s>| { + let (i, obj) = length_value(checked_byte_count, raw(ctx)) + .complete() + .context("length-prefixed data") + .parse(i)?; + // TODO verify remaining entry data + // TODO u8 prefix or extended string prefix? + let (i, _x) = length_data(be_u8).complete().parse(i)?; + Ok((i, obj)) + }, + num_obj as usize, + )(i)?; + + // TODO: Verify rest + let (i, _) = rest(i)?; + Ok((i, objs)) + }; + parser.context("tlist") } /// Parser for `TNamed` objects #[rustfmt::skip::macros(do_parse)] -pub fn tnamed<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], TNamed, E> +pub fn tnamed<'s, E>(input: Span<'s>) -> RResult<'s, TNamed, E> where - E: RootError<&'s [u8]>, + E: RootError>, { tuple(( be_u16.context("version"), @@ -166,30 +299,31 @@ pub fn tnamed<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], TNamed, E> } /// Parse a `TObjArray` -pub fn tobjarray<'s, E, F, P, O>(parser: F, context: &'s Context) -> impl Fn(&'s [u8]) -> IResult<&'s [u8], Vec, E> +pub fn tobjarray<'s, E, P, O>(parser: P) -> impl RParser<'s, Vec, E> where - F: Fn(&'s Context) -> P, - P: Parser, O, E>, - E: RootError<&'s [u8]>, + P: RParser<'s, O, E> + Copy, + E: RootError>, { - make_fn(move |i| { + let parser = move |i| { let (i, _ver) = be_u16(i)?; let (i, _tobj) = tobject(i)?; let (i, _name) = c_string(i)?; let (i, size) = be_i32(i)?; let (i, _low) = be_i32(i)?; - let (i, objs): (&'s [u8], Vec) = count( - raw(context).and_then(parser(context)), + let (i, objs): (_, Vec) = count( + parser, size as usize, )(i)?; Ok((i, objs)) - }) + }; + + parser.context("tobjarray") } /// Parse a `TObjArray` which does not have references pointing outside of the input buffer -pub fn tobjarray_no_context<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], Vec<(ClassInfo, &'s [u8])>, E> +pub fn tobjarray_no_context<'s, E>(input: Span<'s>) -> RResult<'s, Vec<(ClassInfo, Span<'s>)>, E> where - E: RootError<&'s [u8]>, + E: RootError>, { tuple(( be_u16.context("TObjArray header version"), @@ -204,49 +338,49 @@ pub fn tobjarray_no_context<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], Ve // |v| v.into_iter().map(|(ci, s)| (ci, s)).collect()) >> } -pub fn tobjstring<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], &'s str, E> +pub fn tobjstring<'s, E>(input: Span<'s>) -> RResult<'s, &'s str, E> where - E: RootError<&'s [u8]>, + E: RootError>, { + // TODO move all_consuming to call site tuple(( be_u16.context("tobjstring version"), tobject.context("tobjstring object"), string.context("tobjstring name"), - eof.context("tobjstring must consume input") - )).map(|(_, _, name, _)| name) + )).all_consuming() + .context("tobjstring") + .map(|(_, _, name)| name) .parse(input) } /// Parse a so-called `TArray`. Note that ROOT's `TArray`s are actually not fixed size. /// Example usage for TArrayI: `tarray(nom::complete::be_i32).parse(input_slice)` -pub fn tarray<'s, E, F, O>(parser: F) -> impl nom::Parser<&'s [u8], Vec, E> +pub fn tarray<'s, E, F, O>(parser: F) -> impl RParser<'s, Vec, E> where - F: Parser<&'s [u8], O, E>, - E: RootError<&'s [u8]>, + F: RParser<'s, O, E>, + E: RootError>, { length_count(be_u32, parser).context("tarray") } /// Parse a null terminated string -pub fn c_string<'s, E>(i: &'s [u8]) -> nom::IResult<&[u8], &str, E> +pub fn c_string<'s, E>(input: Span<'s>) -> RResult<'s, &str, E> where - E: RootError<&'s [u8]>, + E: RootError>, { - map_res( - take_until(b"\x00".as_ref()).terminated(be_u8.verify(|&v| v == 0)), - str::from_utf8, - ) + take_until(b"\x00".as_ref()).terminated(be_u8.verify(|&v| v == 0)) + .map_res(|s: Span| str::from_utf8(&s)) .context("c string") - .parse(i) + .parse(input) } /// Figure out the class we are looking at. The data might not be /// saved locally but rather in a reference to some other place in the /// buffer.This is modeled after ROOT's `TBufferFile::ReadObjectAny` and /// `TBufferFile::ReadClass` -pub fn classinfo<'s, E>(i: &'s [u8]) -> nom::IResult<&[u8], ClassInfo, E> +pub fn classinfo<'s, E>(input: Span<'s>) -> RResult<'s, ClassInfo, E> where - E: RootError<&'s [u8]>, + E: RootError>, { let (i, tag) = alt(( be_u32 @@ -256,12 +390,12 @@ pub fn classinfo<'s, E>(i: &'s [u8]) -> nom::IResult<&[u8], ClassInfo, E> .verify(|&v| is_byte_count(&v) && v != Flags::NEW_CLASSTAG.bits()) .context("class info: class tag preceded by byte count") .precedes(be_u32) - )).parse(i)?; + )).parse(input)?; match tag as u32 { 0xFFFF_FFFF => { // new classtag mask - c_string.map(ClassInfo::New).parse(i) + c_string.map(ClassInfo::New).context("new classtag").parse(i) } tag => { if Flags::from_bits_truncate(tag).contains(Flags::CLASS_MASK) { @@ -273,15 +407,38 @@ pub fn classinfo<'s, E>(i: &'s [u8]) -> nom::IResult<&[u8], ClassInfo, E> } } -struct RootContextParser<'s, I, O, E> { - context: &'s Context, - parser: fn(&'s Context, I) -> IResult, -} - -impl<'s, I, O, E> Parser for RootContextParser<'s, I, O, E> { - fn parse(&mut self, input: I) -> IResult { - self.parser(self.context, input) - } +pub fn class_name<'s, E>(ctx: &'s Context) -> impl RParser<'s, &'s str, E> + where + E: RootError> +{ + let parser = move |i| { + let ctx_offset = u32::try_from(ctx.offset) + .expect("Encountered pointer larger than 32 bits. Please file a bug."); + + let (i, ci) = classinfo(i)?; + match ci { + ClassInfo::New(name) => Ok((i, name)), + ClassInfo::Exists(tag) => { + let abs_offset = tag & !Flags::CLASS_MASK.bits(); + // TODO handle insufficient buffer length, abs_offset < ctx_offset + let s = ctx.span().slice(((abs_offset - ctx_offset) as usize)..); + let (_, name) = class_name(ctx).context("pre-existing class name").parse(s)?; + Ok((i, name)) + } + ClassInfo::References(tag) => { + let abs_offset = tag; + if abs_offset == 0 { + Ok((i, "")) + } else { + // TODO as above + let s = ctx.span().slice(((abs_offset - ctx_offset) as usize)..); + let (_, name) = class_name(ctx).context("reference to class name").parse(s)?; + Ok((i, name)) + } + } + } + }; + parser.context("class name") } /// Figure out the class we are looking at. This parser immediately @@ -289,57 +446,55 @@ impl<'s, I, O, E> Parser for RootContextParser<'s, I, O, E> { /// this buffer and the associated data. This function needs a /// `Context`, though, which may not be available. If so, have a look /// at the `classinfo` parser. -pub fn class_name_and_buffer<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], (&'s str, &'s [u8]), E> +pub fn class_name_and_buffer<'s, E>(ctx: &'s Context) -> impl RParser<'s, (&'s str, Span<'s>), E> where - E: RootError<&'s [u8]>, + E: RootError>, { - RootContextParser { - context, - parser: (|ctx, i| { - let ctx_offset = u32::try_from(ctx.offset) - .expect("Encountered pointer larger than 32 bits. Please file a bug."); - let (i, ci) = classinfo(i)?; - Ok(match ci { - ClassInfo::New(s) => { - let (i, buf) = length_value(checked_byte_count, rest)(i)?; - (i, (s, buf)) - } - ClassInfo::Exists(tag) => { - let name = { - let abs_offset = tag & !Flags::CLASS_MASK.bits(); - // TODO handle insufficient buffer length, abs_offset < ctx_offset - let s = &ctx.s[((abs_offset - ctx_offset) as usize)..]; - let (_, (name, _)) = class_name_and_buffer(ctx).parse(s)?; - name - }; - let (i, buf) = length_value(checked_byte_count, rest)(i)?; - (i, (name, buf)) - } - ClassInfo::References(tag) => { - let (name, buf) = { - let abs_offset = tag; - // Sometimes, the reference points to `0`; so we return an empty slice - if abs_offset == 0 { - ("", &ctx.s[..0]) - } else { - // TODO as above - let s = &ctx.s[((abs_offset - ctx_offset) as usize)..]; - let (_, (name, buf)) = class_name_and_buffer(ctx).parse(s)?; - (name, buf) - } - }; - (i, (name, buf)) - } - }) - }), - } + let parser = move |i| { + let ctx_offset = u32::try_from(ctx.offset) + .expect("Encountered pointer larger than 32 bits. Please file a bug."); + let (i, ci) = classinfo(i)?; + Ok(match ci { + ClassInfo::New(s) => { + let (i, buf) = length_value(checked_byte_count, rest).complete().context("length-prefixed data").parse(i)?; + (i, (s, buf)) + } + ClassInfo::Exists(tag) => { + let name = { + let abs_offset = tag & !Flags::CLASS_MASK.bits(); + // TODO handle insufficient buffer length, abs_offset < ctx_offset + let s = ctx.span().slice(((abs_offset - ctx_offset) as usize)..); + let (_, name) = class_name(ctx).context("pre-existing class name").parse(s)?; + name + }; + let (i, buf) = length_value(checked_byte_count, rest).complete().context("length-prefixed data").parse(i)?; + (i, (name, buf)) + } + ClassInfo::References(tag) => { + let (name, buf) = { + let abs_offset = tag; + // Sometimes, the reference points to `0`; so we return an empty slice + if abs_offset == 0 { + ("", ctx.span().slice(..0)) + } else { + // TODO as above + let s = ctx.span().slice(((abs_offset - ctx_offset) as usize)..); + let (_, (name, buf)) = class_name_and_buffer(ctx).context("reference to class").parse(s)?; + (name, buf) + } + }; + (i, (name, buf)) + } + }) + }; + parser.context("class name and buffer") } /// Parse a `Raw` chunk from the given input buffer. This is useful when one does not /// know the exact type at the time of parsing -pub fn raw<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Raw<'s>, E> +pub fn raw<'s, E>(context: &'s Context) -> impl RParser<'s, Raw<'s>, E> where - E: RootError<&'s [u8]>, + E: RootError>, { class_name_and_buffer(context) .map(|(classinfo, obj)| Raw { classinfo, obj }) @@ -348,20 +503,24 @@ pub fn raw<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Raw<'s>, E> /// Same as `raw` but doesn't require a `Context` as input. Panics if /// a `Context` is required to parse the underlying buffer (i.e., the /// given buffer contains a reference to some other part of the file. -pub fn raw_no_context<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], (ClassInfo, &[u8]), E> +pub fn raw_no_context<'s, E>(input: Span<'s>) -> RResult<'s, (ClassInfo, Span<'s>), E> where - E: RootError<&'s [u8]>, + E: RootError>, { use super::ClassInfo::*; - let (input, ci) = classinfo.parse(input)?; - - match ci { - // point to beginning of slice - References(0) => take(0usize).map(|o| (ci, o)).parse(input), - New(_) | Exists(_) => length_data(checked_byte_count).map(|o| (ci, o)).parse(input), - // If its a reference to any other thing but 0 it needs a context - _ => panic!("Object needs context!"), - } + let parser = |input| { + let (input, ci) = classinfo.parse(input)?; + + match ci { + // point to beginning of slice + References(0) => take(0usize).map(|o| (ci, o)).parse(input), + New(_) | Exists(_) => length_data(checked_byte_count).complete().context("length-prefixed data").map(|o| (ci, o)).parse(input), + // If its a reference to any other thing but 0 it needs a context + _ => panic!("Object needs context!"), + } + }; + + parser.context("raw (no context)").parse(input) } /// ESD trigger classes are strings describing a particular @@ -369,12 +528,12 @@ pub fn raw_no_context<'s, E>(input: &'s [u8]) -> nom::IResult<&'s [u8], (ClassIn /// different "menu" of available triggers. The trigger menu is saved /// as an `TObjArray` of `TNamed` objects for each event. This breaks /// it down to a simple vector -pub fn parse_tobjarray_of_tnameds<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], Vec, E> +pub fn parse_tobjarray_of_tnameds<'s, E>(input: Span<'s>) -> RResult<'s, Vec, E> where - E: RootError<&'s [u8]>, + E: RootError>, { // each element of the tobjarray has a Vec - let (input, vals) = length_value(checked_byte_count, tobjarray_no_context)(input)?; + let (input, vals) = length_value(checked_byte_count, tobjarray_no_context).complete().context("length-prefixed array").parse(input)?; let strings = vals .into_iter() .map(|(ci, el)| { @@ -392,9 +551,9 @@ pub fn parse_tobjarray_of_tnameds<'s, E>(input: &'s [u8]) -> nom::IResult<&[u8], /// number of bytes can be found in the comment string of the /// generated YAML code (for ALICE ESD files at least). This function /// reconstructs a float from the exponent and mantissa -pub fn parse_custom_mantissa<'s, E>(input: &'s [u8], nbits: usize) -> nom::IResult<&[u8], f32, E> +pub fn parse_custom_mantissa<'s, E>(input: Span<'s>, nbits: usize) -> RResult<'s, f32, E> where - E: RootError<&'s [u8]>, + E: RootError>, { // TODO: Use ByteOrder crate to be cross-platform? pair(be_u8, be_u16).map(|(exp, man)| { @@ -410,12 +569,16 @@ pub fn parse_custom_mantissa<'s, E>(input: &'s [u8], nbits: usize) -> nom::IResu mod classinfo_test { use nom::error::VerboseError; + use crate::core::Span; + use super::classinfo; /// There is an issue where the following is parsed differently on /// nightly ( rustc 1.25.0-nightly (79a521bb9 2018-01-15)), than /// on stable, if verbose-errors are enabled for nom in the /// cargo.toml + /// + /// Passes again as of rustc nightly 1.60.0 (2022-01-12) #[test] fn classinfo_not_complete_read() { let i = vec![ @@ -435,7 +598,7 @@ mod classinfo_test { 105, 116, 108, 101, 0, 0, 0, 65, 0, 0, 0, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 84, 83, 116, 114, 105, 110, 103, ]; - let i = i.as_slice(); + let i = Span::new(i.as_slice()); let (i, _ci) = classinfo::>(i).unwrap(); // The error manifests in the entire input being (wrongly) // consumed, instead of having some left overs diff --git a/root-io/src/core/tkey.rs b/root-io/src/core/tkey.rs index 49d4c5a..6edffb4 100644 --- a/root-io/src/core/tkey.rs +++ b/root-io/src/core/tkey.rs @@ -35,9 +35,9 @@ pub struct TKey { /// Header of a TKey /// Usually, TKeys are followed up by their content, but there is one "index" in every /// root file where only the TKey headers are stored for faster later `Seek`ing -pub fn tkey_header<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], TKeyHeader, E> +pub fn tkey_header<'s, E>(input: Span<'s>) -> RResult<'s, TKeyHeader, E> where - E: RootError<&'s [u8]> + E: RootError> { tuple(( be_u32.context("total size"), @@ -68,9 +68,9 @@ pub fn tkey_header<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], TKeyHeader, E> } /// Parse a file-pointer based on the version of the file -fn seek_point<'s, E>(version: u16) -> impl Parser<&'s [u8], u64, E> +fn seek_point<'s, E>(version: u16) -> impl RParser<'s, u64, E> where - E: RootError<&'s [u8]> + E: RootError> { move |i| { if version > 1000 { @@ -82,9 +82,9 @@ fn seek_point<'s, E>(version: u16) -> impl Parser<&'s [u8], u64, E> } /// Parse a full TKey including its payload -pub fn tkey<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], TKey, E> +pub fn tkey<'s, E>(input: Span<'s>) -> RResult<'s, TKey, E> where - E: RootError<&'s [u8]> + E: RootError> { let (i, hdr) = tkey_header.parse(input)?; let buflen = hdr.total_size - hdr.key_len as u32; @@ -92,9 +92,9 @@ pub fn tkey<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], TKey, E> let mut opthdr = Some(hdr); - take(buflen).map_res::<_, _, DecompressionError>(|buf: &[u8]| { + take(buflen).map_res::<_, _, DecompressionError>(|buf: Span| { let obj = if uncomp_len as usize > buf.len() { - decompress(buf)? + decompress(&buf)? } else { buf.to_vec() }; @@ -103,9 +103,9 @@ pub fn tkey<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], TKey, E> } /// Special thing for the keylist in the file header -pub(crate) fn tkey_headers<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], Vec, E> +pub(crate) fn tkey_headers<'s, E>(input: Span<'s>) -> RResult<'s, Vec, E> where - E: RootError<&'s [u8]> + E: RootError> { - length_count(be_u32, tkey_header).parse(input) + length_count(be_u32, tkey_header).complete().context("count-prefixed data").parse(input) } diff --git a/root-io/src/core/tstreamer.rs b/root-io/src/core/tstreamer.rs index ad3de3b..b3bac29 100644 --- a/root-io/src/core/tstreamer.rs +++ b/root-io/src/core/tstreamer.rs @@ -1,6 +1,5 @@ -use nom::{error::VerboseError, IResult, multi::length_value, Parser}; -use nom::combinator::eof; -use nom::multi::{count, length_count}; +use nom::Parser; +use nom::multi::{count, length_count, length_value}; use nom::number::complete::{be_i32, be_u16, be_u32}; use nom::sequence::{pair, tuple}; use nom_supreme::ParserExt; @@ -98,144 +97,143 @@ pub(crate) struct TStreamerElement { /// Parse a `TStreamer` from a `Raw` buffer. This is usually the case /// after reading the `TList` of `TStreamerInfo`s from a ROOT file -pub(crate) fn tstreamer<'s, E>(raw: Raw<'s>) -> IResult, TStreamer, E> +pub(crate) fn tstreamer<'s, E>(ctx: &'s Context) -> impl RParser<'s, TStreamer, E> + Copy where - E: RootError<&'s [u8]>, + E: RootError>, { - let wrapped_tstreamerelem = length_value(checked_byte_count, tstreamerelement); - - let result = match raw.classinfo { - "TStreamerBase" => tuple(( - be_u16.context("version"), - wrapped_tstreamerelem, - be_i32.context("version base") - )).map(|(_ver, el, version_base)| TStreamer::Base { el, version_base }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerBasicType" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::BasicType { el }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerBasicPointer" => tuple(( - be_u16.context("version"), - wrapped_tstreamerelem, - be_i32.context("cvers"), - string.context("cname"), - string.context("ccls") - )).map(|(_ver, el, cvers, cname, ccls)| TStreamer::BasicPointer { el, cvers, cname: cname.to_string(), ccls: ccls.to_string() }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerLoop" => tuple(( - be_u16.context("version"), - wrapped_tstreamerelem, - be_i32.context("cvers"), - string.context("cname"), - string.context("ccls") - )).map(|(_ver, el, cvers, cname, ccls)| TStreamer::Loop { el, cvers, cname: cname.to_string(), ccls: ccls.to_string() }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerObject" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::Object { el }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerObjectPointer" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::ObjectPointer { el }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerObjectAny" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::ObjectAny { el }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerObjectAnyPointer" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::ObjectAnyPointer { el }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerString" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::String { el }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerSTL" => tuple(( - be_u16.context("version"), - wrapped_tstreamerelem, - be_i32.map(StlTypeID::new).context("vtype"), - be_i32.map_res(TypeId::new).context("ctype") - )).map(|(_ver, el, vtype, ctype)| TStreamer::Stl { el, vtype, ctype }) - .terminated(eof).context("tstreamer").parse(raw.obj), - - "TStreamerSTLstring" => { - // Two version bcs `stlstring` derives from `stl` - be_u16.precedes(length_value(checked_byte_count, tuple(( + let parser = move |i| { + let (i, (classinfo, obj)) = class_name_and_buffer(ctx).parse(i)?; + + + let wrapped_tstreamerelem = length_value(checked_byte_count, tstreamerelement); + + let (_, streamer) = match classinfo { + "TStreamerBase" => tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.context("version base") + )).map(|(_ver, el, version_base)| TStreamer::Base { el, version_base }) + .all_consuming().context("tstreamer (base)").parse(obj), + + "TStreamerBasicType" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::BasicType { el }) + .all_consuming().context("tstreamer (basic type)").parse(obj), + + "TStreamerBasicPointer" => tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.context("cvers"), + string.context("cname"), + string.context("ccls") + )).map(|(_ver, el, cvers, cname, ccls)| TStreamer::BasicPointer { el, cvers, cname: cname.to_string(), ccls: ccls.to_string() }) + .all_consuming().context("tstreamer (basic pointer)").parse(obj), + + "TStreamerLoop" => tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.context("cvers"), + string.context("cname"), + string.context("ccls") + )).map(|(_ver, el, cvers, cname, ccls)| TStreamer::Loop { el, cvers, cname: cname.to_string(), ccls: ccls.to_string() }) + .all_consuming().context("tstreamer (loop)").parse(obj), + + "TStreamerObject" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::Object { el }) + .all_consuming().context("tstreamer (object)").parse(obj), + + "TStreamerObjectPointer" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::ObjectPointer { el }) + .all_consuming().context("tstreamer (object pointer)").parse(obj), + + "TStreamerObjectAny" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::ObjectAny { el }) + .all_consuming().context("tstreamer (object (any))").parse(obj), + + "TStreamerObjectAnyPointer" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::ObjectAnyPointer { el }) + .all_consuming().context("tstreamer (object pointer (any))").parse(obj), + + "TStreamerString" => pair( + be_u16.context("version"), + wrapped_tstreamerelem, + ).map(|(_ver, el)| TStreamer::String { el }) + .all_consuming().context("tstreamer (string)").parse(obj), + + "TStreamerSTL" => tuple(( be_u16.context("version"), wrapped_tstreamerelem, be_i32.map(StlTypeID::new).context("vtype"), be_i32.map_res(TypeId::new).context("ctype") - )))).map(|(_ver, el, vtype, ctype)| TStreamer::StlString { el, vtype, ctype }) - .terminated(eof).context("tstreamer").parse(raw.obj) - } - ci => unimplemented!("Unknown TStreamer {}", ci), - }; + )).map(|(_ver, el, vtype, ctype)| TStreamer::Stl { el, vtype, ctype }) + .all_consuming().context("tstreamer (stl)").parse(obj), + + "TStreamerSTLstring" => { + // Two version bcs `stlstring` derives from `stl` + be_u16.precedes(length_value(checked_byte_count, tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.map(StlTypeID::new).context("vtype"), + be_i32.map_res(TypeId::new).context("ctype") + )))).map(|(_ver, el, vtype, ctype)| TStreamer::StlString { el, vtype, ctype }) + .all_consuming().context("tstreamer (stl string)").parse(obj) + } + ci => unimplemented!("Unknown TStreamer {}", ci), + }?; - result.map(|(i, res)| (Raw { classinfo: raw.classinfo, obj: i }, res)) -} + Ok((i, streamer)) + }; -/* -struct Streamers<'s> { - ctx: &'s Context + parser.context("tstreamer") } -impl<'s> Parser<&'s [u8], Vec, E> for Streamers<'s> -where - E: ParseError<'s [u8]> + Debug - - */ /// Return all `TSreamerInfo` for the data in this file -pub fn streamers<'s, E>(ctx: &'s Context) -> impl Parser<&'s [u8], Vec, E> + 's +pub fn streamers<'s, E>(ctx: &'s Context) -> impl RParser<'s, Vec, E> + 's where - E: RootError<&'s [u8]>, + E: RootError>, { - - move |i| { + let parser = move |i| { // Dunno why we are 4 bytes off with the size of the streamer info... // This TList in the payload has a bytecount in front... let (i, tlist_objs) = length_value(checked_byte_count, tlist(ctx))(i)?; // Mainly this is a TList of `TStreamerInfo`s, but there might // be some "rules" in the end - let streamers = tlist_objs + let _streamers: Result, _> = tlist_objs .iter() .filter_map(|raw| match raw.classinfo { "TStreamerInfo" => Some(raw.obj), _ => None, }) - .map(|buf| tstreamerinfo::>(ctx).parse(buf).unwrap().1) + .map(|buf| Ok(tstreamerinfo(ctx).context("in streamers listing").parse(buf)?.1)) .collect(); + let streamers = _streamers?; + // Parse the "rules", if any, from the same tlist - let _rules: Vec<_> = tlist_objs + let _rules: Result, _> = tlist_objs .iter() .filter_map(|raw| match raw.classinfo { "TList" => Some(raw.obj), _ => None, }) .map(|buf| { - let tl = tlist::>(ctx).parse(buf).unwrap().1; + let tl = tlist(ctx).parse(buf)?.1; // Each `Rule` is a TList of `TObjString`s tl.iter() - .map(|el| tobjstring::<'s, E>(el.obj).unwrap().1) - .collect::>() + .map(|el| tobjstring::<'s, E>(el.obj)) + .collect::, _>>() }) .collect(); + let _rules = _rules?; for raw in tlist_objs { match raw.classinfo { @@ -245,13 +243,15 @@ pub fn streamers<'s, E>(ctx: &'s Context) -> impl Parser<&'s [u8], Vec(i: &'s [u8]) -> IResult<&'s [u8], TStreamerElement, E> +fn tstreamerelement<'s, E>(input: Span<'s>) -> RResult<'s, TStreamerElement, E> where - E: RootError<&'s [u8]>, + E: RootError>, { tuple(( @@ -266,8 +266,7 @@ fn tstreamerelement<'s, E>(i: &'s [u8]) -> IResult<&'s [u8], TStreamerElement, E tuple(( move |i| if ver == 1 { length_count(be_u32, be_u32)(i) } else { count(be_u32, 5)(i) }, string, - eof - )).map_res(move |(max_idx, type_name, _)| { + )).map_res(move |(max_idx, type_name)| { if ver <= 3 { Err(VersionNotSupported(Component::TStreamerElement, ver as u32, "must be >= 4")) } else { @@ -283,7 +282,7 @@ fn tstreamerelement<'s, E>(i: &'s [u8]) -> IResult<&'s [u8], TStreamerElement, E }) } }) - })).context("tstreamer element").parse(i) + })).context("tstreamer element").parse(input) } impl TStreamer { diff --git a/root-io/src/core/tstreamerinfo.rs b/root-io/src/core/tstreamerinfo.rs index 4a43245..c271895 100644 --- a/root-io/src/core/tstreamerinfo.rs +++ b/root-io/src/core/tstreamerinfo.rs @@ -1,6 +1,5 @@ -use nom::{multi::length_value, number::complete::*, Parser}; -use nom::combinator::eof; -use nom::error::dbg_dmp; +use nom::{multi::length_value, number::complete::*}; +use nom_supreme::ParserExt; use quote::*; use std::fmt::Debug; @@ -22,12 +21,12 @@ pub struct TStreamerInfo { } /// Parse one `TStreamerInfo` object (as found in the `TList`) -pub(crate) fn tstreamerinfo<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], TStreamerInfo, E> +pub(crate) fn tstreamerinfo<'s, E>(context: &'s Context) -> impl RParser<'s, TStreamerInfo, E> where - E: RootError<&'s [u8]>, + E: RootError>, { - move |i| { - let parse_members = tobjarray(|_| tstreamer, context); + let parser = move |i| { + let parse_members = tobjarray(tstreamer(context)).context("tstreamerinfo members"); let (i, tstreamerinfo_ver) = be_u16(i)?; let (i, named) = length_value(checked_byte_count, tnamed)(i)?; @@ -35,11 +34,7 @@ pub(crate) fn tstreamerinfo<'s, E>(context: &'s Context) -> impl Parser<&'s [u8] let (i, new_class_version) = be_u32(i)?; let (i, _size_tobjarray_with_class_info) = checked_byte_count(i)?; let (i, _class_info_objarray) = classinfo(i)?; - let (i, data_members) = length_value( - dbg_dmp(checked_byte_count, "byte count"), - dbg_dmp(parse_members, "parse_members"), - )(i)?; - let (i, _eof) = eof(i)?; + let (i, data_members) = length_value(checked_byte_count, parse_members)(i)?; Ok(( i, TStreamerInfo { @@ -50,7 +45,10 @@ pub(crate) fn tstreamerinfo<'s, E>(context: &'s Context) -> impl Parser<&'s [u8] data_members, }, )) - } + }; + + // TODO move all_consuming to call site? + parser.all_consuming().context("tstreamerinfo") } impl ToRustParser for TStreamerInfo { diff --git a/root-io/src/core/types.rs b/root-io/src/core/types.rs index 0d7444e..189440a 100644 --- a/root-io/src/core/types.rs +++ b/root-io/src/core/types.rs @@ -1,8 +1,9 @@ use nom::HexDisplay; +use nom_locate::LocatedSpan; use std::fmt; -use crate::core::Source; +use crate::core::{Source, Span}; /// Absolute point in file to seek data pub(crate) type SeekPointer = u64; @@ -55,9 +56,10 @@ pub struct TNamed { } /// A type holding nothing but the original data and a class info object +#[derive(Clone, Copy)] pub struct Raw<'s> { pub(crate) classinfo: &'s str, - pub(crate) obj: &'s [u8], + pub(crate) obj: Span<'s>, } /// The context from which we are currently parsing @@ -73,6 +75,12 @@ pub struct Context { pub(crate) s: Vec, } +impl Context { + pub fn span(&self) -> Span { + LocatedSpan::new(&self.s) + } +} + impl<'s> fmt::Debug for Raw<'s> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{} \n {}", self.classinfo, self.obj.to_hex(16)) diff --git a/root-io/src/lib.rs b/root-io/src/lib.rs index 3abd308..b69be4d 100644 --- a/root-io/src/lib.rs +++ b/root-io/src/lib.rs @@ -23,6 +23,7 @@ extern crate bitflags; extern crate flate2; extern crate lzma_rs; extern crate nom; +extern crate nom_locate; #[macro_use] extern crate quote; extern crate reqwest; diff --git a/root-io/src/tests/basic_io.rs b/root-io/src/tests/basic_io.rs index 43a81b3..592ae71 100644 --- a/root-io/src/tests/basic_io.rs +++ b/root-io/src/tests/basic_io.rs @@ -1,7 +1,6 @@ #![cfg(not(target_arch = "wasm32"))] use nom::Parser; -use nom::error::VerboseError; use std::path::PathBuf; @@ -34,15 +33,21 @@ fn list_of_rules() { let context = Context { source: PathBuf::from("").into(), offset: 0, - s: vec![], + s: s.to_vec(), }; - use nom::HexDisplay; - println!("{}", s.to_hex(16)); - let (_, (_name, obj)) = class_name_and_buffer::>(&context).parse(s).unwrap(); - println!("{}", obj.to_hex(16)); - let (obj, _ci) = classinfo::>(obj).unwrap(); - println!("{:?}", _ci); - println!("{}", obj.to_hex(16)); + + + let mut parser = wrap_parser_ctx(|ctx| move |i| { + let (leftover, (name, obj)) = class_name_and_buffer(ctx).parse(i)?; + let (_, l) = tlist(ctx).parse(obj)?; + Ok((leftover, (name, l))) + }); + + let (name, l) = match parser(&context) { + Ok((name, l)) => (name, l), + Err(e) => { println!("{}", e); assert!(false); unreachable!() } + }; + println!("name = {}\nlist = {:?}", name, l); // let (_obj, l) = tlist(obj, &context).unwrap(); // assert_eq!(l.name, "listOfRules"); // assert_eq!(l.len, 2); diff --git a/root-io/src/tree_reader/branch.rs b/root-io/src/tree_reader/branch.rs index 3580725..6ea346b 100644 --- a/root-io/src/tree_reader/branch.rs +++ b/root-io/src/tree_reader/branch.rs @@ -1,6 +1,5 @@ use futures::prelude::*; use nom::{error::VerboseError, IResult, multi::{count, length_data, length_value}, number::complete::*, Parser}; -use nom::combinator::eof; use nom_supreme::ParserExt; use std::fmt::Debug; @@ -116,13 +115,13 @@ impl TBranch { /// ``` pub fn as_fixed_size_iterator(&self, p: P) -> impl Stream where - P: Fn(&[u8]) -> IResult<&[u8], T, VerboseError<&[u8]>>, + P: Fn(Span) -> IResult>, { stream::iter(self.containers().to_owned()) .then(|basket| async move { basket.raw_data().await.unwrap() }) .map(move |(n_events_in_basket, buffer)| { // Parse the entire basket buffer; if something is left over its just junk - let x = count(&p, n_events_in_basket as usize)(&buffer); + let x = count(&p, n_events_in_basket as usize)(Span::new(&buffer)); let events = match x { Ok((_rest, output)) => output, Err(e) => panic!("Parser failed unexpectedly {:?}", e), @@ -142,13 +141,13 @@ impl TBranch { el_counter: Vec, ) -> impl Stream> where - P: Fn(&[u8]) -> IResult<&[u8], T, VerboseError<&[u8]>>, + P: Fn(Span) -> IResult>, { let mut elems_per_event = el_counter.into_iter(); stream::iter(self.containers().to_owned()) .then(|basket| async move { basket.raw_data().await.unwrap() }) .map(move |(n_events_in_basket, buffer)| { - let mut buffer = buffer.as_slice(); + let mut buffer = Span::new(&buffer); let mut events = Vec::with_capacity(n_events_in_basket as usize); for _ in 0..n_events_in_basket { if let Some(n_elems_in_event) = elems_per_event.next() { @@ -169,34 +168,42 @@ impl TBranch { /// `TBranchElements` are a subclass of `TBranch` if the content is an Object /// We ignore the extra information for now and just parse the TBranch"Header" in either case -pub(crate) fn tbranch_hdr<'s, E>(ctxt: &'s Context) -> impl Parser, TBranch, E> +pub(crate) fn tbranch_hdr<'s, E>(ctxt: &'s Context) -> impl RParser<'s, TBranch, E> + Copy where - E: RootError<&'s [u8]>, + E: RootError>, { - move |raw: Raw<'s>| { - match raw.classinfo { + let parser = move |i| { + let (i, (classinfo, obj)) = class_name_and_buffer(ctxt).parse(i)?; + + let (_, branch) = match classinfo { "TBranchElement" | "TBranchObject" => { - be_u16.precedes(length_value(checked_byte_count, tbranch(ctxt))) - .terminated(eof) - .parse(raw.obj) + be_u16.precedes(length_value(checked_byte_count, tbranch(ctxt)).complete().context("length-prefixed data")) + .all_consuming() + .context("tbranch object") + .parse(obj) } "TBranch" => tbranch(ctxt) - .terminated(eof) - .parse(raw.obj), + .all_consuming() + .context("tbranch wrapper") + .parse(obj), name => panic!("Unexpected Branch type {}", name), - }.map(|(i, res)| (Raw { classinfo: raw.classinfo, obj: i }, res)) - } + }?; + + Ok((i, branch)) + }; + + parser.context("tbranch hdr") } -fn tbranch<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], TBranch, E> +fn tbranch<'s, E>(context: &'s Context) -> impl RParser<'s, TBranch, E> where - E: RootError<&'s [u8]>, + E: RootError>, { - move |inpt| { - let (i, _ver) = be_u16.verify(|v| [11, 12].contains(v)).parse(inpt)?; - let (i, tnamed) = length_value(checked_byte_count, tnamed).parse(i)?; - let (i, _tattfill) = length_data(checked_byte_count).parse(i)?; + let parser = move |inpt| { + let (i, _ver) = be_u16.verify(|v| [11, 12].contains(v)).context("assertion: branch version must be 11 or 12").parse(inpt)?; + let (i, tnamed) = length_value(checked_byte_count, tnamed).complete().context("tnamed").parse(i)?; + let (i, _tattfill) = length_data(checked_byte_count).context("tattrfill").parse(i)?; let (i, fcompress) = be_i32(i)?; let (i, fbasketsize) = be_i32(i)?; let (i, fentryoffsetlen) = be_i32(i)?; @@ -210,16 +217,25 @@ fn tbranch<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], TBranch, E> let (i, ftotbytes) = be_i64(i)?; let (i, fzipbytes) = be_i64(i)?; let (i, fbranches) = - length_value(checked_byte_count, tobjarray(tbranch_hdr, context))(i)?; + length_value(checked_byte_count, tobjarray(tbranch_hdr(context))).complete().context("fbranches").parse(i)?; let (i, fleaves) = - length_value(checked_byte_count, tobjarray(TLeaf::parse_from_raw, context))(i)?; + length_value(checked_byte_count, tobjarray(TLeaf::parse(context))).complete().context("fleaves").parse(i)?; let (i, fbaskets) = length_value(checked_byte_count, - tobjarray(|_| |r: Raw<'s>| Ok((Raw { classinfo: r.classinfo, obj: &[] }, r.obj)), context))(i)?; - let (i, fbasketbytes) = be_u8.precedes(count(be_i32, fmaxbaskets as usize)).parse(i)?; - let (i, fbasketentry) = be_u8.precedes(count(be_i64, fmaxbaskets as usize)).parse(i)?; - let (i, fbasketseek) = be_u8.precedes(count(be_u64, fmaxbaskets as usize)).parse(i)?; - let (i, ffilename) = string(i)?; + tobjarray(|i| class_name_and_buffer(context).map(|(_, buf)| buf).parse(i))) + .complete() + .context("fbaskets") + .parse(i)?; + let (i, fbasketbytes) = be_u8.precedes(count(be_i32, fmaxbaskets as usize)) + .context("fbasketbytes") + .parse(i)?; + let (i, fbasketentry) = be_u8.precedes(count(be_i64, fmaxbaskets as usize)) + .context("fbasketentry") + .parse(i)?; + let (i, fbasketseek) = be_u8.precedes(count(be_u64, fmaxbaskets as usize)) + .context("fbasketseek") + .parse(i)?; + let (i, ffilename) = string.context("ffilename").parse(i)?; let name = tnamed.name; let fbaskets = fbaskets @@ -263,5 +279,7 @@ fn tbranch<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], TBranch, E> containers, }, )) - } + }; + + parser.context("tbranch") } diff --git a/root-io/src/tree_reader/container.rs b/root-io/src/tree_reader/container.rs index 04373f6..cefbd2c 100644 --- a/root-io/src/tree_reader/container.rs +++ b/root-io/src/tree_reader/container.rs @@ -1,12 +1,11 @@ use nom::*; use nom::combinator::rest; -use nom::number::complete::{be_i8, be_u16}; -use nom::number::streaming::be_u32; +use nom::number::complete::{be_i8, be_u16, be_u32}; use nom::sequence::tuple; use nom_supreme::ParserExt; use crate::core::*; -use crate::tree_reader::ReadError; +use crate::core::ReadError; #[derive(Debug, Clone)] pub(crate) enum Container { @@ -40,9 +39,9 @@ impl Container { /// Return a tuple indicating the number of elements in this basket /// and the content as a Vec -fn tbasket2vec<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], (u32, Vec), E> +fn tbasket2vec<'s, E>(input: Span<'s>) -> RResult<'s, (u32, Vec), E> where - E : RootError<&'s [u8]> + E : RootError> { tuple(( tkey_header.context("header"), @@ -55,7 +54,7 @@ fn tbasket2vec<'s, E>(input: &'s [u8]) -> IResult<&'s [u8], (u32, Vec), E> rest.context("buffer") )).map_res::<_, _, DecompressionError>(|(hdr, _, _, _, n_entry_buf, last, _, buf)| { let buf = if hdr.uncomp_len as usize > buf.len() { - decompress(buf)? + decompress(&buf)? } else { buf.to_vec() }; @@ -76,7 +75,7 @@ mod tests { use crate::core::tkey_header; use crate::core::wrap_parser; - use crate::tree_reader::ReadError; + use crate::core::ReadError; use super::tbasket2vec; diff --git a/root-io/src/tree_reader/leafs.rs b/root-io/src/tree_reader/leafs.rs index 9718c54..adfd80a 100644 --- a/root-io/src/tree_reader/leafs.rs +++ b/root-io/src/tree_reader/leafs.rs @@ -1,4 +1,4 @@ -use nom::{combinator::verify, error::ParseError, IResult, multi::length_value, number::complete::*, Parser}; +use nom::{combinator::verify, multi::length_value, number::complete::*, Parser}; use nom::branch::alt; use nom::sequence::tuple; use nom_supreme::ParserExt; @@ -9,8 +9,10 @@ use std::fmt::Debug; use crate::{code_gen::rust::ToRustType, core::*}; /// Parse a bool from a big endian u8 -fn be_bool<'a, E: ParseError<&'a [u8]>>(i: &'a [u8]) -> IResult<&[u8], bool, E> { - let (i, byte) = verify(be_u8, |&byte| byte == 0 || byte == 1)(i)?; +fn be_bool<'s, E: RootError>>(i: Span<'s>) -> RResult<'s, bool, E> { + let (i, byte) = be_u8 + .verify(|&byte| byte == 0 || byte == 1) + .parse(i)?; Ok((i, byte == 1)) } @@ -21,26 +23,20 @@ pub struct TLeaf { } impl TLeaf { - pub fn parse<'s, E>( - raw: Raw<'s>, - context: &'s Context, - ) -> IResult, Self, E> - where - E: RootError<&'s [u8]>, - { - TLeafVariant::parse(raw, context).map(|(i, var)| (i, Self { variant: var })) - } - // A helper function to get around some lifetime issues on the caller sider - pub(crate) fn parse_from_raw<'s, E>( + pub(crate) fn parse<'s, E>( ctxt: &'s Context, - ) -> impl Parser, Self, E> + ) -> impl RParser<'s, Self, E> + Copy where - E: RootError<&'s [u8]>, + E: RootError>, { - move |raw: Raw<'s>| { - Self::parse(raw, ctxt) - } + let parser = move |i| { + let (i, (classinfo, obj)) = class_name_and_buffer(ctxt).parse(i)?; + let (_, variant) = TLeafVariant::parse(ctxt, classinfo, obj)?; + Ok((i, Self { variant })) + }; + + parser.context("tleaf") } } @@ -59,25 +55,21 @@ enum TLeafVariant { } impl TLeafVariant { - fn parse<'s, E>(raw: Raw<'s>, context: &'s Context) -> IResult, Self, E> + fn parse<'s, E>(context: &'s Context, classinfo: &'s str, i: Span<'s>) -> RResult<'s, Self, E> where - E: RootError<&'s [u8]> + Debug, + E: RootError>, { - let i = raw.obj; - let mk_raw = |i| Raw { obj: i, classinfo: raw.classinfo }; - match raw.classinfo { - "TLeafB" => TLeafB::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafB(l))), - "TLeafS" => TLeafS::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafS(l))), - "TLeafI" => TLeafI::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafI(l))), - "TLeafL" => TLeafL::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafL(l))), - "TLeafF" => TLeafF::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafF(l))), - "TLeafD" => TLeafD::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafD(l))), - "TLeafC" => TLeafC::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafC(l))), - "TLeafO" => TLeafO::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafO(l))), - "TLeafD32" => TLeafD32::parse(i, context).map(|(i, l)| (mk_raw(i), TLeafVariant::TLeafD32(l))), - "TLeafElement" => { - TLeafElement::parse(context).map(TLeafVariant::TLeafElement).parse(i).map(|(i, l)| (mk_raw(i), l)) - } + match classinfo { + "TLeafB" => TLeafB::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafB(l))), + "TLeafS" => TLeafS::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafS(l))), + "TLeafI" => TLeafI::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafI(l))), + "TLeafL" => TLeafL::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafL(l))), + "TLeafF" => TLeafF::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafF(l))), + "TLeafD" => TLeafD::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafD(l))), + "TLeafC" => TLeafC::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafC(l))), + "TLeafO" => TLeafO::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafO(l))), + "TLeafD32" => TLeafD32::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafD32(l))), + "TLeafElement" => TLeafElement::parse(context).map(TLeafVariant::TLeafElement).parse(i), name => unimplemented!("Unexpected Leaf type {}", name), } } @@ -97,12 +89,12 @@ macro_rules! make_tleaf_variant { fmaximum: $field_type, } impl $struct_name { - fn parse<'s, E>(i: &'s [u8], context: &'s Context) -> IResult<&'s [u8], Self, E> + fn parse<'s, E>(input: Span<'s>, context: &'s Context) -> RResult<'s, Self, E> where - E: RootError<&'s [u8]>, + E: RootError>, { // All known descendens have version 1 - let (i, _) = verify(be_u16, |&ver| ver == 1)(i)?; + let (i, _) = verify(be_u16, |&ver| ver == 1)(input)?; let (i, base) = length_value(checked_byte_count, TLeafBase::parse(context))(i)?; let (i, fminimum) = $parser(i)?; @@ -150,9 +142,9 @@ struct TLeafElement { } impl TLeafElement { - fn parse<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Self, E> + fn parse<'s, E>(context: &'s Context) -> impl RParser<'s, Self, E> where - E: RootError<&'s [u8]>, + E: RootError>, { be_u16.verify(|&ver| ver == 1).precedes( tuple(( @@ -186,9 +178,9 @@ struct TLeafBase { } impl TLeafBase { - fn parse<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Self, E> + fn parse<'s, E>(context: &'s Context) -> impl RParser<'s, Self, E> where - E: RootError<&'s [u8]>, + E: RootError>, { move |i| { let (i, ver) = be_u16(i)?; @@ -198,12 +190,11 @@ impl TLeafBase { let (i, foffset) = be_i32(i)?; let (i, fisrange) = be_bool(i)?; let (i, fisunsigned) = be_bool(i)?; - let (i, fleafcount): (&'s [u8], Option>) = { + let (i, fleafcount) = { alt(( be_u32.verify(|&v| v == 0).map(|_| None), - raw(context) - .and_then(|r: Raw<'s>| TLeafVariant::parse(r, context)) - .map(|leaf| Some(Box::new(leaf))) + TLeaf::parse(context) + .map(|TLeaf { variant }| Some(Box::new(variant))) )).parse(i)? }; Ok(( diff --git a/root-io/src/tree_reader/mod.rs b/root-io/src/tree_reader/mod.rs index fbcbbbc..d8a6568 100644 --- a/root-io/src/tree_reader/mod.rs +++ b/root-io/src/tree_reader/mod.rs @@ -4,12 +4,6 @@ //! several elements per collision. This module provides two Iterator //! structs in order to iterate over these columns (`TBranches` in //! ROOT lingo). -use nom::error::VerboseError; -use thiserror::Error; - -use crate::core::DecompressionError; -use crate::tree_reader::ReadError::ParseError; - pub use self::tree::{Tree, ttree}; mod branch; @@ -17,32 +11,6 @@ mod container; mod leafs; pub mod tree; -#[derive(Error, Debug)] -pub enum ReadError { - #[error("Error reading data")] - IoError(#[from] std::io::Error), - #[error("Error fetching data from online source")] - ReqwestError(#[from] reqwest::Error), - #[error("Error decompressing data")] - DecompressionError(#[from] DecompressionError), - #[error("Error parsing data")] - ParseError(VerboseError>), -} - -impl From>> for ReadError { - fn from(e: VerboseError>) -> ReadError { - ParseError(e) - } -} - -#[derive(Error, Debug)] -pub enum WriteError { - #[error(transparent)] - ReadError(#[from] ReadError), - #[error(transparent)] - FmtError(#[from] std::fmt::Error) -} - #[cfg(all(test, not(target_arch = "wasm32")))] mod tests { use tokio; @@ -50,13 +18,15 @@ mod tests { use std::path::PathBuf; use crate::core::RootFile; + use crate::core::UnwrapPrint; + #[tokio::test] async fn simple_tree() { let path = PathBuf::from("./src/test_data/simple.root"); let f = RootFile::new(path.as_path()) .await - .expect("Failed to open file"); - f.items()[0].as_tree().await.unwrap(); + .unwrap_print(); + f.items()[0].as_tree().await.unwrap_print(); } } diff --git a/root-io/src/tree_reader/tree.rs b/root-io/src/tree_reader/tree.rs index 833412c..25b5b19 100644 --- a/root-io/src/tree_reader/tree.rs +++ b/root-io/src/tree_reader/tree.rs @@ -1,5 +1,5 @@ use nom::branch::alt; -use nom::combinator::{cond, verify}; +use nom::combinator::cond; use nom::multi::{count, length_value}; use nom::multi::length_data; use nom::number::complete::{be_f64, be_i32, be_i64, be_u16, be_u32, be_u8}; @@ -126,22 +126,26 @@ impl<'s> Tree { /// Parse a `Tree` from the given buffer. Usually used through `FileItem::parse_with`. #[allow(clippy::unnecessary_unwrap)] -pub fn ttree<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Tree, E> +pub fn ttree<'s, E>(context: &'s Context) -> impl RParser<'s, Tree, E> where - E: RootError<&'s [u8]>, + E: RootError>, { - move |i| { - let none_or_u8_buf = |i: &'s [u8]| { + let parser = move |i| { + let none_or_u8_buf = |input: Span<'s>| { alt(( - be_u32.verify(|&v| v == 0).precedes(be_u32.map(|_| None).cut()), - be_u32.verify(|&v| v != 0).precedes(raw(context)).map(|r| Some(r.obj.to_vec())) - )).parse(i) + be_u32.verify(|&v| v == 0) + .map(|_| None) + .context("empty ttree buffer"), + raw(context) + .map(|r| Some(r.obj.to_vec())) + .context("filled ttree buffer") + )).parse(input) }; - let (i, ver) = verify(be_u16, |v| [16, 17, 18, 19].contains(v))(i)?; - let (i, tnamed) = length_value(checked_byte_count, tnamed)(i)?; - let (i, _tattline) = length_data(checked_byte_count)(i)?; - let (i, _tattfill) = length_data(checked_byte_count)(i)?; - let (i, _tattmarker) = length_data(checked_byte_count)(i)?; + let (i, ver) = be_u16.verify(|v| [16, 17, 18, 19].contains(v)).context("assertion: ttree version is in 16-19").parse(i)?; + let (i, tnamed) = length_value(checked_byte_count, tnamed).context("tnamed").complete().context("length-prefixed data").parse(i)?; + let (i, _tattline) = length_data(checked_byte_count).context("tattrline").complete().context("length-prefixed data").parse(i)?; + let (i, _tattfill) = length_data(checked_byte_count).context("tattrfill").complete().context("length-prefixed data").parse(i)?; + let (i, _tattmarker) = length_data(checked_byte_count).context("tattrmarker").complete().context("length-prefixed data").parse(i)?; let (i, fentries) = be_i64(i)?; let (i, ftotbytes) = be_i64(i)?; let (i, fzipbytes) = be_i64(i)?; @@ -159,35 +163,49 @@ pub fn ttree<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Tree, E> let (i, _fautosave) = be_i64(i)?; let (i, _fautoflush) = cond(ver >= 18, be_i64)(i)?; let (i, festimate) = be_i64(i)?; + // TODO change None to empty vec? let (i, _fclusterrangeend) = { if let Some(n_clst_range) = fnclusterrange { - preceded(be_u8, count(be_i64, n_clst_range as usize))(i) - .map(|(i, ends)| (i, Some(ends)))? + preceded(be_u8, count(be_i64, n_clst_range as usize)) + .context("fclusterrange end") + .map(Some) + .parse(i)? } else { (i, None) } }; let (i, _fclustersize) = { if let Some(n_clst_range) = fnclusterrange { - preceded(be_u8, count(be_i64, n_clst_range as usize))(i) - .map(|(i, ends)| (i, Some(ends)))? + preceded(be_u8, count(be_i64, n_clst_range as usize)) + .context("fcluster size") + .map(Some) + .parse(i)? } else { (i, None) } }; + let (i, fbranches) = - length_value(checked_byte_count, tobjarray(tbranch_hdr, context))(i)?; - let (i, fleaves) = length_value(checked_byte_count, - tobjarray(TLeaf::parse_from_raw, context), - )(i)?; + length_value(checked_byte_count, tobjarray(tbranch_hdr(context))) + .context("ttree branches") + .complete() + .context("length-prefixed data") + .parse(i)?; + + let (i, fleaves) = + length_value(checked_byte_count, tobjarray(TLeaf::parse(context))) + .context("ttree leaves") + .complete() + .context("length-prefixed data") + .parse(i)?; - let (i, faliases) = none_or_u8_buf(i)?; - let (i, findexvalues) = tarray(be_f64).parse(i)?; - let (i, findex) = tarray(be_i32).parse(i)?; - let (i, ftreeindex) = none_or_u8_buf(i)?; - let (i, ffriends) = none_or_u8_buf(i)?; - let (i, fuserinfo) = none_or_u8_buf(i)?; - let (i, fbranchref) = none_or_u8_buf(i)?; + let (i, faliases) = none_or_u8_buf.context("faliases").parse(i)?; + let (i, findexvalues) = tarray(be_f64).context("findexvalues").parse(i)?; + let (i, findex) = tarray(be_i32).context("findex").parse(i)?; + let (i, ftreeindex) = none_or_u8_buf.context("ftreeindex").parse(i)?; + let (i, ffriends) = none_or_u8_buf.context("ffriends").parse(i)?; + let (i, fuserinfo) = none_or_u8_buf.context("fuserinfo").parse(i)?; + let (i, fbranchref) = none_or_u8_buf.context("fbranchref").parse(i)?; let ftreeindex = ftreeindex.map(Pointer); let ffriends = ffriends.map(Pointer); let fuserinfo = fuserinfo.map(Pointer); @@ -220,5 +238,7 @@ pub fn ttree<'s, E>(context: &'s Context) -> impl Parser<&'s [u8], Tree, E> fbranchref, }, )) - } + }; + + parser.context("ttree") } diff --git a/root-io/tests/high_level_io.rs b/root-io/tests/high_level_io.rs index 1e2b20c..d20caca 100644 --- a/root-io/tests/high_level_io.rs +++ b/root-io/tests/high_level_io.rs @@ -56,22 +56,22 @@ fn local_paths() -> Vec { #[cfg(not(target_arch = "wasm32"))] mod local { use super::*; + use root_io::core::UnwrapPrint; #[tokio::test] async fn root_file_methods() { let paths = local_paths(); for p in paths { println!("{:?}", p); - let f = RootFile::new(p.as_path()) - .await - .expect("Failed to open file"); + let f = RootFile::new(p.as_path()).await.unwrap_print(); + f.streamer_infos().await.unwrap_print(); let mut s = String::new(); f.streamer_info_as_yaml(&mut s).await.unwrap(); f.streamer_info_as_rust(&mut s).await.unwrap(); for item in f.items() { item.name(); if item.verbose_info().contains("TTree") { - item.as_tree().await.unwrap(); + item.as_tree().await.unwrap_print(); } } } From 47ddeaf3b7e6891267cbad0b1de5f73bbe514e32 Mon Sep 17 00:00:00 2001 From: lschuetze Date: Sun, 10 Apr 2022 21:08:28 +0200 Subject: [PATCH 3/7] Bump root-io version to account for signature changes --- examples/convert_to_msgpack/Cargo.toml | 2 +- malice/Cargo.toml | 2 +- root-io/Cargo.toml | 2 +- root-ls/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/convert_to_msgpack/Cargo.toml b/examples/convert_to_msgpack/Cargo.toml index 515e966..84f6921 100644 --- a/examples/convert_to_msgpack/Cargo.toml +++ b/examples/convert_to_msgpack/Cargo.toml @@ -8,6 +8,6 @@ edition = "2018" alice-open-data = { version="0.5.0", path="../../alice-open-data" } malice = { version="0.3.0", path="../../malice" } rmp-serde = "0.13.7" -root-io = { version="0.3.0", path="../../root-io" } +root-io = { version="0.4", path="../../root-io" } serde = "1.0" serde_derive = "1.0" diff --git a/malice/Cargo.toml b/malice/Cargo.toml index eabcd26..8d02a92 100644 --- a/malice/Cargo.toml +++ b/malice/Cargo.toml @@ -20,7 +20,7 @@ bitflags = "1" failure = "0.1" futures = "0.3" nom = "7" -root-io = { version="0.3", path="../root-io" } +root-io = { version="0.4", path="../root-io" } # Optional dependencies alice-sys = { version="0.1", optional = true } itertools = "0.10" diff --git a/root-io/Cargo.toml b/root-io/Cargo.toml index 22c863b..9a0a83b 100644 --- a/root-io/Cargo.toml +++ b/root-io/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "root-io" -version = "0.3.1" +version = "0.4.0" authors = ["cbourjau "] exclude = ["*test_data/", "*.root"] description = "Reading of `.root` binary files which are commonly used in particle physics" diff --git a/root-ls/Cargo.toml b/root-ls/Cargo.toml index 53546ae..7f68063 100644 --- a/root-ls/Cargo.toml +++ b/root-ls/Cargo.toml @@ -14,7 +14,7 @@ edition = "2018" [dependencies] clap = "2" failure = "0.1" -root-io = { version="0.3.1", path="../root-io" } +root-io = { version="0.4", path="../root-io" } prettyplease = "0.1" syn = "1" tokio = { version = "1", features = ["full"] } From cabf9007a93560642f4dca881d6c19f5770c4815 Mon Sep 17 00:00:00 2001 From: lschuetze Date: Sun, 5 Jun 2022 22:24:04 +0200 Subject: [PATCH 4/7] Changes to make current behavior consistent with old root-io behavior --- root-io/Cargo.toml | 2 +- root-io/src/core/file.rs | 9 +++++---- root-io/src/core/tkey.rs | 10 +++++++--- root-io/src/tests/basic_io.rs | 14 ++++++++++---- root-io/src/tree_reader/branch.rs | 5 +---- root-io/tests/read_esd.rs | 7 ++++--- 6 files changed, 28 insertions(+), 19 deletions(-) diff --git a/root-io/Cargo.toml b/root-io/Cargo.toml index 9a0a83b..702badc 100644 --- a/root-io/Cargo.toml +++ b/root-io/Cargo.toml @@ -33,7 +33,7 @@ wasm-bindgen-futures = "0.4" [target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] criterion = "0.3" -tokio = { version = "1.15", features = ["macros"] } +tokio = { version = "1.15", features = ["macros", "rt-multi-thread"] } [target.'cfg(target_arch = "wasm32")'.dev-dependencies] wasm-bindgen = "0.2" diff --git a/root-io/src/core/file.rs b/root-io/src/core/file.rs index f2a8d7d..363aebf 100644 --- a/root-io/src/core/file.rs +++ b/root-io/src/core/file.rs @@ -168,9 +168,9 @@ impl RootFile { let dir = wrap_parser(directory)(&dir_buf)?; let tkey_buf = source.fetch(dir.seek_keys, dir.n_bytes_keys as u64).await?; - let tkey_of_keys = wrap_parser(tkey.all_consuming())(&tkey_buf)?; + let tkey_of_keys = wrap_parser(tkey.all_consuming().context("root file key listing"))(&tkey_buf)?; - let keys = wrap_parser(tkey_headers.all_consuming())(&tkey_of_keys.obj)?; + let keys = wrap_parser(tkey_headers.context("root file keys"))(&tkey_of_keys.obj)?; let items = keys .iter() @@ -262,6 +262,7 @@ mod test { use std::path::Path; use super::*; + use self::UnwrapPrint; const SIMPLE_FILE_REMOTE: &str = "https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true"; @@ -269,12 +270,12 @@ mod test { #[tokio::test] async fn read_cms_file_remote() { let url = "http://opendata.web.cern.ch/eos/opendata/cms/hidata/HIRun2010/HIAllPhysics/RECO/ZS-v2/0000/001DA267-7243-E011-B38F-001617C3B6CE.root"; - let f = RootFile::new(Url::parse(url).unwrap()).await.unwrap(); + let f = RootFile::new(Url::parse(url).unwrap()).await.unwrap_print(); let mut s = String::new(); f.streamer_info_as_yaml(&mut s).await.unwrap(); println!("{}", s); for item in f.items() { - item.as_tree().await.unwrap(); + item.as_tree().await.unwrap_print(); } } diff --git a/root-io/src/core/tkey.rs b/root-io/src/core/tkey.rs index 6edffb4..dd4622c 100644 --- a/root-io/src/core/tkey.rs +++ b/root-io/src/core/tkey.rs @@ -39,7 +39,7 @@ pub fn tkey_header<'s, E>(input: Span<'s>) -> RResult<'s, TKeyHeader, E> where E: RootError> { - tuple(( + let (i, hdr) = tuple(( be_u32.context("total size"), be_u16.context("version"), be_u32.context("uncompressed length"), @@ -64,7 +64,9 @@ pub fn tkey_header<'s, E>(input: Span<'s>) -> RResult<'s, TKeyHeader, E> class_name: class_name.to_string(), obj_name: obj_name.to_string(), obj_title: obj_title.to_string(), - }))).context("tkey header").parse(input) + }))).context("tkey header").parse(input)?; + + Ok((i, hdr)) } /// Parse a file-pointer based on the version of the file @@ -101,8 +103,10 @@ pub fn tkey<'s, E>(input: Span<'s>) -> RResult<'s, TKey, E> Ok(TKey { hdr: opthdr.take().unwrap(), obj }) }).context("tkey").parse(i) } - +// Note that tkey current /// Special thing for the keylist in the file header +// Note that tkey_headers currently does not parse the entire input buffer +// See: read_cms_file_remote for an example pub(crate) fn tkey_headers<'s, E>(input: Span<'s>) -> RResult<'s, Vec, E> where E: RootError> diff --git a/root-io/src/tests/basic_io.rs b/root-io/src/tests/basic_io.rs index 592ae71..6370519 100644 --- a/root-io/src/tests/basic_io.rs +++ b/root-io/src/tests/basic_io.rs @@ -36,18 +36,24 @@ fn list_of_rules() { s: s.to_vec(), }; - + // TODO we parse this object pretty weirdly let mut parser = wrap_parser_ctx(|ctx| move |i| { + use nom::HexDisplay; let (leftover, (name, obj)) = class_name_and_buffer(ctx).parse(i)?; - let (_, l) = tlist(ctx).parse(obj)?; - Ok((leftover, (name, l))) + let len = obj.fragment().len(); + println!("{name}: Located span of length {len}"); + println!("{}", obj.fragment().to_hex(16)); + //let (_, l) = tlist(ctx).parse(obj)?; + let (leftover, ci) = classinfo(obj)?; + println!("As classinfo: {ci:?}"); + Ok((leftover, (name, obj))) }); let (name, l) = match parser(&context) { Ok((name, l)) => (name, l), Err(e) => { println!("{}", e); assert!(false); unreachable!() } }; - println!("name = {}\nlist = {:?}", name, l); + //println!("name = {}\nlist = {:?}", name, l); // let (_obj, l) = tlist(obj, &context).unwrap(); // assert_eq!(l.name, "listOfRules"); // assert_eq!(l.len, 2); diff --git a/root-io/src/tree_reader/branch.rs b/root-io/src/tree_reader/branch.rs index 6ea346b..dc4d72f 100644 --- a/root-io/src/tree_reader/branch.rs +++ b/root-io/src/tree_reader/branch.rs @@ -86,7 +86,6 @@ impl TBranch { /// /// # Example /// ``` - /// extern crate failure; /// extern crate nom; /// extern crate root_io; /// use futures::StreamExt; @@ -178,13 +177,10 @@ pub(crate) fn tbranch_hdr<'s, E>(ctxt: &'s Context) -> impl RParser<'s, TBranch, let (_, branch) = match classinfo { "TBranchElement" | "TBranchObject" => { be_u16.precedes(length_value(checked_byte_count, tbranch(ctxt)).complete().context("length-prefixed data")) - .all_consuming() - .context("tbranch object") .parse(obj) } "TBranch" => tbranch(ctxt) - .all_consuming() .context("tbranch wrapper") .parse(obj), name => panic!("Unexpected Branch type {}", name), @@ -196,6 +192,7 @@ pub(crate) fn tbranch_hdr<'s, E>(ctxt: &'s Context) -> impl RParser<'s, TBranch, parser.context("tbranch hdr") } +// TODO: tbranch currently does not parse tbranch objects in its entirety (see e.g. fn tbranch<'s, E>(context: &'s Context) -> impl RParser<'s, TBranch, E> where E: RootError>, diff --git a/root-io/tests/read_esd.rs b/root-io/tests/read_esd.rs index 883ad5a..be09a17 100644 --- a/root-io/tests/read_esd.rs +++ b/root-io/tests/read_esd.rs @@ -134,6 +134,7 @@ mod wasm { mod x64 { use super::*; use reqwest::Url; + use root_io::core::UnwrapPrint; const REMOTE_FILE: &str = "http://opendata.web.cern.ch/eos/opendata/alice/2010/LHC10h/000139038/ESD/0001/AliESDs.root"; @@ -142,13 +143,13 @@ mod x64 { async fn read_esd_local_and_remote() { let path = alice_open_data::test_file().unwrap(); let files = [ - RootFile::new(path).await.expect("Failed to open file"), + RootFile::new(path).await.unwrap_print(), RootFile::new(Url::parse(REMOTE_FILE).unwrap()) .await - .expect("Failed to open file"), + .unwrap_print(), ]; for f in &files { - let t = f.items()[0].as_tree().await.unwrap(); + let t = f.items()[0].as_tree().await.unwrap_print(); test_branch_iterators(&t).await; } } From e8eba83858647595f69f031de895d3a5b9aea359 Mon Sep 17 00:00:00 2001 From: lschuetze Date: Tue, 7 Jun 2022 11:47:50 +0200 Subject: [PATCH 5/7] Merge upstream/master --- root-io/src/core/file.rs | 94 +++++++++++++++-------------------- root-io/src/tests/basic_io.rs | 6 +-- 2 files changed, 42 insertions(+), 58 deletions(-) diff --git a/root-io/src/core/file.rs b/root-io/src/core/file.rs index 363aebf..2597eb0 100644 --- a/root-io/src/core/file.rs +++ b/root-io/src/core/file.rs @@ -260,6 +260,7 @@ mod test { use tokio; use std::path::Path; + use crate::core::ReadError; use super::*; use self::UnwrapPrint; @@ -279,16 +280,13 @@ mod test { } } - async fn file_header_test(source: Source) { - let hdr = source - .fetch(0, FILE_HEADER_SIZE) - .await - .and_then(|buf| { - file_header(&buf) - .map_err(|_| format_err!("Failed to parse file header")) - .map(|(_i, o)| o) - }) - .unwrap(); + async fn file_header_test(source: Source) -> Result<(), ReadError> { + let buf = source.fetch(0, FILE_HEADER_SIZE).await?; + + let hdr = match wrap_parser(file_header)(&buf) { + Ok(hdr) => hdr, + Err(e) => return Err(ReadError::ParseError(e)) + }; let should = FileHeader { version: 60600, @@ -306,40 +304,29 @@ mod test { seek_dir: 158, }; assert_eq!(hdr, should); + + Ok(()) } #[tokio::test] async fn file_header_test_local() { let local = Source::new(Path::new("./src/test_data/simple.root")); - file_header_test(local).await; + file_header_test(local).await.unwrap_print(); } #[tokio::test] async fn file_header_test_remote() { let remote = Source::new(Url::parse(SIMPLE_FILE_REMOTE).unwrap()); - file_header_test(remote).await; + file_header_test(remote).await.unwrap_print(); } - async fn directory_test(source: Source) { - let hdr = source - .fetch(0, FILE_HEADER_SIZE) - .await - .and_then(|buf| { - file_header(&buf) - .map_err(|_| ParseError(e)) - .map(|(_i, o)| o) - }) - .unwrap(); - - let dir = source - .fetch(hdr.seek_dir, TDIRECTORY_MAX_SIZE) - .await - .and_then(|buf| { - directory(&buf) - .map_err(|e| ParseError(e)) - .map(|(_i, o)| o) - }) - .unwrap(); + async fn directory_test(source: Source) -> Result<(), ReadError> { + let hdr_buf = source.fetch(0, FILE_HEADER_SIZE).await?; + let hdr = wrap_parser(file_header)(&hdr_buf)?; + + let dir_buf = source.fetch(hdr.seek_dir, TDIRECTORY_MAX_SIZE).await?; + let dir = wrap_parser(directory)(&dir_buf)?; + assert_eq!( dir, Directory { @@ -354,30 +341,26 @@ mod test { seek_keys: 1021 } ); + + Ok(()) } #[tokio::test] async fn directory_test_local() { let local = Path::new("./src/test_data/simple.root").into(); - directory_test(local).await; + directory_test(local).await.unwrap_print(); } #[tokio::test] async fn directory_test_remote() { let remote = Source::new(Url::parse(SIMPLE_FILE_REMOTE).unwrap()); - directory_test(remote).await; + directory_test(remote).await.unwrap_print(); } - async fn streamerinfo_test(source: Source) { - let key = source - .fetch(1117, 4446) - .await - .and_then(|buf| { - tkey(&buf) - .map_err(|e| ParseError(e)) - .map(|(_i, o)| o) - }) - .unwrap(); + async fn streamerinfo_test(source: Source) -> Result<(), ReadError> { + let buf = source.fetch(1117, 4446).await?; + let key = wrap_parser(tkey)(&buf)?; + assert_eq!(key.hdr.obj_name, "StreamerInfo"); let key_len = key.hdr.key_len; @@ -388,22 +371,23 @@ mod test { s: key.obj, }; - match length_value(checked_byte_count, |i| { - tlist::>(i, &context) - })(&context.s) - { - Ok((_, l)) => { - assert_eq!(l.len(), 19); - } - Err(_e) => panic!("Not parsed as TList!"), - }; + let mut tlist_parser = wrap_parser_ctx(|ctx| { + length_value(checked_byte_count, move |i| { + tlist::>(&ctx).parse(i) + }).all_consuming() + }); + + let tlist = tlist_parser(&context)?; + assert_eq!(tlist.len(), 19); + + Ok(()) } #[tokio::test] async fn streamerinfo_test_local() { let local = Path::new("./src/test_data/simple.root").into(); - streamerinfo_test(local).await; + streamerinfo_test(local).await.unwrap_print(); } #[tokio::test] @@ -412,6 +396,6 @@ mod test { "https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true") .unwrap() .into(); - streamerinfo_test(remote).await; + streamerinfo_test(remote).await.unwrap_print(); } } diff --git a/root-io/src/tests/basic_io.rs b/root-io/src/tests/basic_io.rs index 6370519..f389422 100644 --- a/root-io/src/tests/basic_io.rs +++ b/root-io/src/tests/basic_io.rs @@ -44,7 +44,7 @@ fn list_of_rules() { println!("{name}: Located span of length {len}"); println!("{}", obj.fragment().to_hex(16)); //let (_, l) = tlist(ctx).parse(obj)?; - let (leftover, ci) = classinfo(obj)?; + let (_leftover, ci) = classinfo(obj)?; println!("As classinfo: {ci:?}"); Ok((leftover, (name, obj))) }); @@ -53,8 +53,8 @@ fn list_of_rules() { Ok((name, l)) => (name, l), Err(e) => { println!("{}", e); assert!(false); unreachable!() } }; - //println!("name = {}\nlist = {:?}", name, l); + println!("name = {}\nlist = {:?}", name, l); // let (_obj, l) = tlist(obj, &context).unwrap(); - // assert_eq!(l.name, "listOfRules"); + //assert_eq!(l, "listOfRules"); // assert_eq!(l.len, 2); } From afc4e5867b9a1c7409dc82d23dbd11d3b44400cc Mon Sep 17 00:00:00 2001 From: Christian Bourjau Date: Wed, 29 Jun 2022 10:26:44 +0200 Subject: [PATCH 6/7] Run cargo fmt on this PR --- malice/src/event.rs | 4 +- root-io/src/core/compression.rs | 15 +- root-io/src/core/data_source.rs | 4 +- root-io/src/core/file.rs | 70 +++-- root-io/src/core/file_item.rs | 13 +- root-io/src/core/mod.rs | 24 +- root-io/src/core/parsers.rs | 448 +++++++++++++++++---------- root-io/src/core/tkey.rs | 97 +++--- root-io/src/core/tstreamer.rs | 230 ++++++++------ root-io/src/core/tstreamerinfo.rs | 4 +- root-io/src/core/typeid.rs | 1 - root-io/src/core/types.rs | 2 +- root-io/src/lib.rs | 1 - root-io/src/tests/basic_io.rs | 30 +- root-io/src/tree_reader/branch.rs | 85 +++-- root-io/src/tree_reader/container.rs | 18 +- root-io/src/tree_reader/leafs.rs | 57 ++-- root-io/src/tree_reader/mod.rs | 7 +- root-io/src/tree_reader/tree.rs | 85 +++-- 19 files changed, 730 insertions(+), 465 deletions(-) diff --git a/malice/src/event.rs b/malice/src/event.rs index e121274..3527115 100644 --- a/malice/src/event.rs +++ b/malice/src/event.rs @@ -7,7 +7,9 @@ use itertools::izip; use nom::{combinator::map, number::complete::*, sequence::tuple}; use wasm_bindgen::prelude::*; -use root_io::core::parsers::{Span, RResult, parse_custom_mantissa, parse_tobjarray_of_tnameds, RootError}; +use root_io::core::parsers::{ + parse_custom_mantissa, parse_tobjarray_of_tnameds, RResult, RootError, Span, +}; use root_io::stream_zip; use root_io::tree_reader::Tree; diff --git a/root-io/src/core/compression.rs b/root-io/src/core/compression.rs index b9fba0f..7d62074 100644 --- a/root-io/src/core/compression.rs +++ b/root-io/src/core/compression.rs @@ -2,9 +2,9 @@ use flate2::bufread::ZlibDecoder; use lzma_rs::xz_decompress; use thiserror::Error; -use DecompressionError::*; -use std::*; use std::io::Read; +use std::*; +use DecompressionError::*; #[derive(Error, Debug)] pub enum DecompressionError { @@ -21,7 +21,9 @@ pub enum DecompressionError { } pub(crate) fn decompress(input: &[u8]) -> Result, DecompressionError> { - if input.len() < 9 { return Err(InsufficientData); } + if input.len() < 9 { + return Err(InsufficientData); + } // There is something in bytes 2..=8, but we haven't identified it yet let magic = &input[..2]; @@ -45,7 +47,8 @@ pub(crate) fn decompress(input: &[u8]) -> Result, DecompressionError> { // skip leading u64 lz4_compress::decompress(&compressed[8..]).map_err(|_| Lz4Failure) } - other => Err(AlgorithmNotImplemented(String::from_utf8(other.to_vec()) - .unwrap_or(format!("Bad magic {other:?}")))) + other => Err(AlgorithmNotImplemented( + String::from_utf8(other.to_vec()).unwrap_or(format!("Bad magic {other:?}")), + )), } -} \ No newline at end of file +} diff --git a/root-io/src/core/data_source.rs b/root-io/src/core/data_source.rs index c351087..18253c6 100644 --- a/root-io/src/core/data_source.rs +++ b/root-io/src/core/data_source.rs @@ -1,6 +1,6 @@ use reqwest::{ - Client, - header::{RANGE, USER_AGENT}, Url, + header::{RANGE, USER_AGENT}, + Client, Url, }; use std::fs::File; diff --git a/root-io/src/core/file.rs b/root-io/src/core/file.rs index 2597eb0..6a1b8c0 100644 --- a/root-io/src/core/file.rs +++ b/root-io/src/core/file.rs @@ -1,18 +1,21 @@ -use nom::{self, - number::complete::{be_i16, be_i32, be_u128, be_u16, be_u32, be_u64, be_u8}, Parser}; use nom::sequence::tuple; -use nom_supreme::{ParserExt, tag::complete::tag}; +use nom::{ + self, + number::complete::{be_i16, be_i32, be_u128, be_u16, be_u32, be_u64, be_u8}, + Parser, +}; +use nom_supreme::{tag::complete::tag, ParserExt}; use uuid::Uuid; use std::fmt; +use crate::core::{ReadError, WriteError}; use crate::{ code_gen::rust::{ToNamedRustParser, ToRustStruct}, - core::*, core::tstreamer::streamers, + core::*, MAP_OFFSET, }; -use crate::core::{ReadError, WriteError}; /// Size of serialized `FileHeader` in bytes const FILE_HEADER_SIZE: u64 = 75; @@ -61,7 +64,10 @@ pub struct Directory { /// Parse opening part of a root file fn file_header<'s, E: RootError>>(i: Span<'s>) -> RResult<'s, FileHeader, E> { let parser = |i| { - fn version_dep_int<'s, E: RootError>>(i: Span<'s>, is_64_bit: bool) -> RResult<'s, u64, E> { + fn version_dep_int<'s, E: RootError>>( + i: Span<'s>, + is_64_bit: bool, + ) -> RResult<'s, u64, E> { if is_64_bit { be_u64(i) } else { @@ -111,8 +117,8 @@ fn file_header<'s, E: RootError>>(i: Span<'s>) -> RResult<'s, FileHeade /// Parse a file-pointer based on the version of the file fn versioned_pointer<'s, E>(version: i16) -> impl RParser<'s, u64, E> - where - E: RootError> +where + E: RootError>, { move |i| { if version > 1000 { @@ -125,22 +131,24 @@ fn versioned_pointer<'s, E>(version: i16) -> impl RParser<'s, u64, E> /// Directory within a root file; exists on ever file fn directory<'s, E>(input: Span<'s>) -> RResult<'s, Directory, E> - where - E: RootError> +where + E: RootError>, { tuple(( be_i16.context("directory version"), be_u32.context("directory time created"), be_u32.context("directory time modified"), be_i32.context("directory key byte count"), - be_i32.context("directory name byte count") - )).flat_map(make_fn(|(version, c_time, m_time, n_bytes_keys, n_bytes_name)| { - tuple(( - versioned_pointer(version).context("seek dir"), - versioned_pointer(version).context("seek parent"), - versioned_pointer(version).context("seek keys") - )).map(move |(seek_dir, seek_parent, seek_keys)| - Directory { + be_i32.context("directory name byte count"), + )) + .flat_map(make_fn( + |(version, c_time, m_time, n_bytes_keys, n_bytes_name)| { + tuple(( + versioned_pointer(version).context("seek dir"), + versioned_pointer(version).context("seek parent"), + versioned_pointer(version).context("seek keys"), + )) + .map(move |(seek_dir, seek_parent, seek_keys)| Directory { version, c_time, m_time, @@ -150,10 +158,12 @@ fn directory<'s, E>(input: Span<'s>) -> RResult<'s, Directory, E> seek_parent, seek_keys, }) - })).context("ROOT directory").parse(input) + }, + )) + .context("ROOT directory") + .parse(input) } - impl RootFile { /// Open a new ROOT file either from a `Url`, or from a `Path` /// (not available on `wasm32`). @@ -168,7 +178,8 @@ impl RootFile { let dir = wrap_parser(directory)(&dir_buf)?; let tkey_buf = source.fetch(dir.seek_keys, dir.n_bytes_keys as u64).await?; - let tkey_of_keys = wrap_parser(tkey.all_consuming().context("root file key listing"))(&tkey_buf)?; + let tkey_of_keys = + wrap_parser(tkey.all_consuming().context("root file key listing"))(&tkey_buf)?; let keys = wrap_parser(tkey_headers.context("root file keys"))(&tkey_of_keys.obj)?; @@ -182,10 +193,9 @@ impl RootFile { pub async fn get_streamer_context(&self) -> Result { let seek_info_len = (self.hdr.nbytes_info) as u64; - let info_key_buf = self.source - .fetch(self.hdr.seek_info, seek_info_len) - .await?; - let info_key = wrap_parser(tkey.all_consuming().context("streamer info key"))(&info_key_buf)?; + let info_key_buf = self.source.fetch(self.hdr.seek_info, seek_info_len).await?; + let info_key = + wrap_parser(tkey.all_consuming().context("streamer info key"))(&info_key_buf)?; let key_len = info_key.hdr.key_len; Ok(Context { source: self.source.clone(), @@ -259,11 +269,11 @@ mod test { use reqwest::Url; use tokio; - use std::path::Path; use crate::core::ReadError; + use std::path::Path; - use super::*; use self::UnwrapPrint; + use super::*; const SIMPLE_FILE_REMOTE: &str = "https://github.com/cbourjau/alice-rs/blob/master/root-io/src/test_data/simple.root?raw=true"; @@ -285,7 +295,7 @@ mod test { let hdr = match wrap_parser(file_header)(&buf) { Ok(hdr) => hdr, - Err(e) => return Err(ReadError::ParseError(e)) + Err(e) => return Err(ReadError::ParseError(e)), }; let should = FileHeader { @@ -374,7 +384,8 @@ mod test { let mut tlist_parser = wrap_parser_ctx(|ctx| { length_value(checked_byte_count, move |i| { tlist::>(&ctx).parse(i) - }).all_consuming() + }) + .all_consuming() }); let tlist = tlist_parser(&context)?; @@ -385,7 +396,6 @@ mod test { #[tokio::test] async fn streamerinfo_test_local() { - let local = Path::new("./src/test_data/simple.root").into(); streamerinfo_test(local).await.unwrap_print(); } diff --git a/root-io/src/core/file_item.rs b/root-io/src/core/file_item.rs index 652be2b..56fe1a3 100644 --- a/root-io/src/core/file_item.rs +++ b/root-io/src/core/file_item.rs @@ -1,10 +1,10 @@ use nom::multi::length_value; use nom_supreme::ParserExt; -use crate::core::{checked_byte_count, Context, Source, TKeyHeader, wrap_parser}; use crate::core::compression::decompress; use crate::core::ReadError; -use crate::tree_reader::{Tree, ttree}; +use crate::core::{checked_byte_count, wrap_parser, Context, Source, TKeyHeader}; +use crate::tree_reader::{ttree, Tree}; /// Describes a single item within this file (e.g. a `Tree`) #[derive(Debug)] @@ -62,10 +62,11 @@ impl FileItem { let ctx = self.get_context().await?; let buf = ctx.s.as_slice(); - let res = wrap_parser(length_value(checked_byte_count, ttree(&ctx)) - .complete() - .all_consuming() - .context("ttree wrapper") + let res = wrap_parser( + length_value(checked_byte_count, ttree(&ctx)) + .complete() + .all_consuming() + .context("ttree wrapper"), )(buf)?; Ok(res) } diff --git a/root-io/src/core/mod.rs b/root-io/src/core/mod.rs index 35e0915..0f5131c 100644 --- a/root-io/src/core/mod.rs +++ b/root-io/src/core/mod.rs @@ -1,11 +1,11 @@ //! This module contains the core structs and parsers needed to read //! the self-description of a root file. These parsers can be used to //! build new parsers using the [root-ls](https://github.com/cbourjau/alice-rs) cli. -use thiserror::Error; use crate::core::ReadError::ParseError; +use thiserror::Error; -pub(crate) use self::compression::*; pub use self::compression::DecompressionError; +pub(crate) use self::compression::*; pub use self::data_source::Source; pub use self::file::RootFile; pub use self::file_item::FileItem; @@ -16,6 +16,7 @@ pub(crate) use self::tstreamerinfo::{tstreamerinfo, TStreamerInfo}; pub(crate) use self::typeid::*; pub(crate) use self::types::*; +mod compression; mod data_source; mod file; mod file_item; @@ -25,17 +26,16 @@ mod tstreamer; mod tstreamerinfo; mod typeid; pub mod types; -mod compression; #[derive(Error, Debug)] pub enum SemanticError { #[error("Unsupported version {1} for {0:?} ({2})")] - VersionNotSupported(Component, u32, &'static str) + VersionNotSupported(Component, u32, &'static str), } #[derive(Debug)] pub enum Component { - TStreamerElement + TStreamerElement, } #[derive(Error, Debug)] @@ -54,12 +54,16 @@ pub trait UnwrapPrint { fn unwrap_print(self) -> T; } -impl UnwrapPrint for Result { +impl UnwrapPrint for Result { fn unwrap_print(self) -> T { match self { Ok(v) => v, - Err(ParseError(e)) => { panic!("Tried to unwrap a parse error:\n{}", e); }, - Err(e) => { panic!("Tried to unwrap a read error:\n{}", e) } + Err(ParseError(e)) => { + panic!("Tried to unwrap a parse error:\n{}", e); + } + Err(e) => { + panic!("Tried to unwrap a read error:\n{}", e) + } } } } @@ -75,5 +79,5 @@ pub enum WriteError { #[error(transparent)] ReadError(#[from] ReadError), #[error(transparent)] - FmtError(#[from] std::fmt::Error) -} \ No newline at end of file + FmtError(#[from] std::fmt::Error), +} diff --git a/root-io/src/core/parsers.rs b/root-io/src/core/parsers.rs index 5b3c8d2..8726f27 100644 --- a/root-io/src/core/parsers.rs +++ b/root-io/src/core/parsers.rs @@ -1,10 +1,19 @@ -use nom::{self, bytes::complete::{take, take_until}, combinator::rest, error::ParseError, IResult, multi::{count, length_data, length_value}, number::complete::{be_i32, be_u16, be_u32, be_u8}, Parser, sequence::{pair, tuple}}; use nom::branch::alt; use nom::combinator::cond; use nom::error::{ContextError, FromExternalError, VerboseError}; -use nom::HexDisplay; use nom::multi::length_count; +use nom::HexDisplay; use nom::Slice; +use nom::{ + self, + bytes::complete::{take, take_until}, + combinator::rest, + error::ParseError, + multi::{count, length_data, length_value}, + number::complete::{be_i32, be_u16, be_u32, be_u8}, + sequence::{pair, tuple}, + IResult, Parser, +}; use nom_locate::LocatedSpan; use nom_supreme::parser_ext::ParserExt; use nom_supreme::tag::TagError; @@ -21,26 +30,34 @@ use std::fmt::Debug; use std::result::Result::Ok; use std::str; -use crate::core::*; use crate::core::compression::DecompressionError; +use crate::core::*; -pub trait RootError: ParseError -+ ContextError -+ TagError -+ FromExternalError -+ FromExternalError -+ FromExternalError -+ FromExternalError -+ Debug {} - -impl -+ ContextError -+ TagError -+ FromExternalError -+ FromExternalError -+ FromExternalError -+ FromExternalError -+ Debug> RootError for T {} +pub trait RootError: + ParseError + + ContextError + + TagError + + FromExternalError + + FromExternalError + + FromExternalError + + FromExternalError + + Debug +{ +} + +impl< + I, + T: ParseError + + ContextError + + TagError + + FromExternalError + + FromExternalError + + FromExternalError + + FromExternalError + + Debug, + > RootError for T +{ +} pub type Span<'s> = LocatedSpan<&'s [u8]>; pub type RResult<'s, O, E> = IResult, O, E>; @@ -49,7 +66,6 @@ pub trait RParser<'s, O, E: RootError>>: Parser, O, E> {} impl<'s, O, E: RootError>, T: Parser, O, E>> RParser<'s, O, E> for T {} - /// Corerce a closure to a Fn, for use with map_res et al. pub(crate) fn make_fn U>(f: F) -> F { f @@ -66,44 +82,58 @@ pub struct VerboseErrorInfo { impl std::fmt::Display for VerboseErrorInfo { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> { - use nom::error::VerboseErrorKind::*; use nom::error::ErrorKind as Kind; + use nom::error::VerboseErrorKind::*; writeln!(f, "Error while parsing this block of data:")?; if self.borrow_input().len() > 0x100 { write!(f, "{}", self.borrow_input()[..0x100].to_hex(16))?; - writeln!(f, " \t[0x{:x} of 0x{:x} bytes omitted]", self.borrow_input().len() - 0x100, self.borrow_input().len())?; + writeln!( + f, + " \t[0x{:x} of 0x{:x} bytes omitted]", + self.borrow_input().len() - 0x100, + self.borrow_input().len() + )?; } else { write!(f, "{}", self.borrow_input().to_hex(16))?; } for (span, kind) in self.borrow_error().errors.iter().rev() { match kind { - Context(context) => write!(f, "\nWhile trying to parse {}:", context)?, - Char(c) => write!(f, "\nWhile trying to match a '{}':", c)?, - Nom(Kind::Verify) => continue, - Nom(Kind::Complete) => { write!(f, "\nExpected length exceeds buffer")?; continue }, - Nom(Kind::Eof) => if span.fragment().is_empty() { - // Yes, EOF is returned both for parsers expecting more input (the be_uXX - // parsers for us, mostly), but also by parsers expecting *no more* input - // such as all_consuming. - // We distinguish based on the remaining input - if everything was consumed, - // it must have been a premature EOF - write!(f, "\nUnexpected EOF")? - } else { - write!(f, "\nExpected EOF, but found excess data")? - }, - Nom(kind) => write!(f, "\nIn {:?}:", kind)? + Context(context) => write!(f, "\nWhile trying to parse {}:", context)?, + Char(c) => write!(f, "\nWhile trying to match a '{}':", c)?, + Nom(Kind::Verify) => continue, + Nom(Kind::Complete) => { + write!(f, "\nExpected length exceeds buffer")?; + continue; + } + Nom(Kind::Eof) => { + if span.fragment().is_empty() { + // Yes, EOF is returned both for parsers expecting more input (the be_uXX + // parsers for us, mostly), but also by parsers expecting *no more* input + // such as all_consuming. + // We distinguish based on the remaining input - if everything was consumed, + // it must have been a premature EOF + write!(f, "\nUnexpected EOF")? + } else { + write!(f, "\nExpected EOF, but found excess data")? + } + } + Nom(kind) => write!(f, "\nIn {:?}:", kind)?, }; let fragment_begin = span.location_offset(); let fragment_end = match kind { - Context(_) | Nom(_) => span.location_offset() + std::cmp::max(1, std::cmp::min(0x100, span.fragment().len())), - Char(_) => span.location_offset() + 1 + Context(_) | Nom(_) => { + span.location_offset() + + std::cmp::max(1, std::cmp::min(0x100, span.fragment().len())) + } + Char(_) => span.location_offset() + 1, }; // Align hexdump to 16-byte blocks let hexdump_begin = fragment_begin / 16 * 16; - let hexdump_first_line_end = std::cmp::min(self.borrow_input().len(), hexdump_begin + 16); + let hexdump_first_line_end = + std::cmp::min(self.borrow_input().len(), hexdump_begin + 16); let hexdump_end = (fragment_end + 16) / 16 * 16; let hexdump_end = std::cmp::min(self.borrow_input().len(), hexdump_end); @@ -111,24 +141,56 @@ impl std::fmt::Display for VerboseErrorInfo { let fragment_begin_in_dump = 3 * (fragment_begin % 16); let fragment_end_in_dump = 3 * ((fragment_end - 1) % 16) + 1; - write!(f, "\n{}", self.borrow_input()[hexdump_begin..hexdump_first_line_end].to_hex_from(16, hexdump_begin))?; + write!( + f, + "\n{}", + self.borrow_input()[hexdump_begin..hexdump_first_line_end] + .to_hex_from(16, hexdump_begin) + )?; if fragment_begin == self.borrow_input().len() { - write!(f, " \t{: >skip$} [at end of input]", '^', skip=fragment_begin_in_dump + 1)?; + write!( + f, + " \t{: >skip$} [at end of input]", + '^', + skip = fragment_begin_in_dump + 1 + )?; } else if fragment_begin / 16 == fragment_end / 16 { - write!(f, " \t{: >skip$}{:~>len$}", - '^', '~', - skip = fragment_begin_in_dump + 1, - len = fragment_end_in_dump - fragment_begin_in_dump)? + write!( + f, + " \t{: >skip$}{:~>len$}", + '^', + '~', + skip = fragment_begin_in_dump + 1, + len = fragment_end_in_dump - fragment_begin_in_dump + )? } else { - write!(f, " \t{: >skip$}{:~>len$}", - '^', '~', - skip = fragment_begin_in_dump + 1, - len = (3 * 15 + 1) - fragment_begin_in_dump)?; - write!(f, "\n{}", self.borrow_input()[hexdump_begin + 16..hexdump_end].to_hex_from(16, hexdump_begin + 16))?; + write!( + f, + " \t{: >skip$}{:~>len$}", + '^', + '~', + skip = fragment_begin_in_dump + 1, + len = (3 * 15 + 1) - fragment_begin_in_dump + )?; + write!( + f, + "\n{}", + self.borrow_input()[hexdump_begin + 16..hexdump_end] + .to_hex_from(16, hexdump_begin + 16) + )?; if span.fragment().len() > 0x100 { - write!(f, " \t[0x{:x} bytes omitted]", span.fragment().len() - 0x100)?; + write!( + f, + " \t[0x{:x} bytes omitted]", + span.fragment().len() - 0x100 + )?; } else { - write!(f, " \t{:~>len$}", '~', len = fragment_end_in_dump + 1)?; + write!( + f, + " \t{:~>len$}", + '~', + len = fragment_end_in_dump + 1 + )?; } } writeln!(f)?; @@ -138,20 +200,30 @@ impl std::fmt::Display for VerboseErrorInfo { } } -fn reborrow_spans<'s, 't>(new_base: &'s [u8], error: VerboseError>) -> VerboseError> { +fn reborrow_spans<'s, 't>( + new_base: &'s [u8], + error: VerboseError>, +) -> VerboseError> { let reborrow = |span: &Span<'_>| unsafe { - Span::new_from_raw_offset(span.location_offset(), - span.location_line(), - &new_base[span.location_offset()..span.location_offset() + span.fragment().len()], - ()) + Span::new_from_raw_offset( + span.location_offset(), + span.location_line(), + &new_base[span.location_offset()..span.location_offset() + span.fragment().len()], + (), + ) }; VerboseError { - errors: error.errors.iter().map(|(span, kind)| (reborrow(span), kind.clone())).collect::>() + errors: error + .errors + .iter() + .map(|(span, kind)| (reborrow(span), kind.clone())) + .collect::>(), } } -pub fn wrap_parser<'s, O>(parser: impl Parser, O, VerboseError>>) -> impl FnMut(&'s [u8]) -> Result -{ +pub fn wrap_parser<'s, O>( + parser: impl Parser, O, VerboseError>>, +) -> impl FnMut(&'s [u8]) -> Result { let mut parser = parser.complete(); move |input| match parser.parse(Span::new(input)) { @@ -169,10 +241,12 @@ pub fn wrap_parser<'s, O>(parser: impl Parser, O, VerboseError } } -pub fn wrap_parser_ctx<'s, O, F, P>(parser_gen: F) -> impl FnMut(&'s Context) -> Result - where - P: Parser, O, VerboseError>>, - F: Fn(&'s Context) -> P +pub fn wrap_parser_ctx<'s, O, F, P>( + parser_gen: F, +) -> impl FnMut(&'s Context) -> Result +where + P: Parser, O, VerboseError>>, + F: Fn(&'s Context) -> P, { move |ctx| match parser_gen(ctx).complete().parse(ctx.span()) { Ok((_, parsed)) => Ok(parsed), @@ -197,32 +271,42 @@ fn is_byte_count(v: &u32) -> bool { /// Return the size in bytes of the following object in the input. The /// count is the remainder of this object minus the size of the count. pub fn checked_byte_count<'s, E>(input: Span<'s>) -> RResult<'s, u32, E> - where - E: RootError>, +where + E: RootError>, { - be_u32.verify(is_byte_count).context("assertion: byte count matches bytecount mask") + be_u32 + .verify(is_byte_count) + .context("assertion: byte count matches bytecount mask") .map(|v| v & !Flags::BYTE_COUNT_MASK.bits()) - .verify(|&v| v != 0).context("assertion: byte count must not be 0") - .verify(|&v| v < 0x8000_0000).context("assertion: highest bit in byte count must be unset") + .verify(|&v| v != 0) + .context("assertion: byte count must not be 0") + .verify(|&v| v < 0x8000_0000) + .context("assertion: highest bit in byte count must be unset") .parse(input) } /// Read ROOT's string length prefix, which is usually a u8, but can be extended /// to a u32 (for a total of 5 bytes) if the first byte is 255 fn string_length_prefix<'s, E>(input: Span<'s>) -> RResult<'s, u32, E> - where - E: RootError>, +where + E: RootError>, { alt(( - be_u8.verify(|&v| v == 255).precedes(be_u32).context("extended string length prefix"), - be_u8.verify(|&v| v != 255).map(|v| v as u32).context("short string length prefix") + be_u8 + .verify(|&v| v == 255) + .precedes(be_u32) + .context("extended string length prefix"), + be_u8 + .verify(|&v| v != 255) + .map(|v| v as u32) + .context("short string length prefix"), ))(input) } /// Read ROOT's version of short and long strings (preceeded by u8). Does not read null terminated! pub fn string<'s, E>(input: Span<'s>) -> RResult<'s, &'s str, E> - where - E: RootError>, +where + E: RootError>, { length_data(string_length_prefix) .map_res(|s| str::from_utf8(&s)) @@ -232,33 +316,46 @@ pub fn string<'s, E>(input: Span<'s>) -> RResult<'s, &'s str, E> /// Parser for the most basic of ROOT types pub fn tobject<'s, E>(input: Span<'s>) -> RResult<'s, TObject, E> - where - E: RootError>, +where + E: RootError>, { tuple(( be_u16.context("tobject version"), be_u32.context("tobject id"), - be_u32.context("tobject flags") - .map(|v| TObjectFlags::from_bits_truncate(v | TObjectFlags::IS_ON_HEAP.bits())) + be_u32 + .context("tobject flags") + .map(|v| TObjectFlags::from_bits_truncate(v | TObjectFlags::IS_ON_HEAP.bits())), )) - .flat_map(make_fn(|(ver, id, bits): (u16, u32, TObjectFlags)| - cond(bits.intersects(TObjectFlags::IS_REFERENCED), be_u16.context("tobject reference")) - .map(move |_ref| TObject { ver, id, bits, _ref }))) - .parse(input) + .flat_map(make_fn(|(ver, id, bits): (u16, u32, TObjectFlags)| { + cond( + bits.intersects(TObjectFlags::IS_REFERENCED), + be_u16.context("tobject reference"), + ) + .map(move |_ref| TObject { + ver, + id, + bits, + _ref, + }) + })) + .parse(input) } /// Parse a `TList` pub fn tlist<'s, E>(ctx: &'s Context) -> impl RParser<'s, Vec>, E> - where - E: RootError>, +where + E: RootError>, { let parser = move |inpt| { - let (i, _ver) = be_u16.context("tlist version") - .verify(|&v| v == 5).context("assertion: tlist version must be 5").parse(inpt)?; + let (i, _ver) = be_u16 + .context("tlist version") + .verify(|&v| v == 5) + .context("assertion: tlist version must be 5") + .parse(inpt)?; let (i, (_tobj, _name, num_obj)) = tuple(( tobject.context("tlist object header"), string.context("tlist name"), - be_i32.context("tlist element count") + be_i32.context("tlist element count"), ))(i)?; let (i, objs) = count( @@ -285,35 +382,36 @@ pub fn tlist<'s, E>(ctx: &'s Context) -> impl RParser<'s, Vec>, E> /// Parser for `TNamed` objects #[rustfmt::skip::macros(do_parse)] pub fn tnamed<'s, E>(input: Span<'s>) -> RResult<'s, TNamed, E> - where - E: RootError>, +where + E: RootError>, { tuple(( be_u16.context("version"), tobject.context("object header"), string.context("name"), - string.context("title") - )).context("named tobject") - .map(|(_, _, name, title)| TNamed { name: name.to_string(), title: title.to_string() }) - .parse(input) + string.context("title"), + )) + .context("named tobject") + .map(|(_, _, name, title)| TNamed { + name: name.to_string(), + title: title.to_string(), + }) + .parse(input) } /// Parse a `TObjArray` pub fn tobjarray<'s, E, P, O>(parser: P) -> impl RParser<'s, Vec, E> - where - P: RParser<'s, O, E> + Copy, - E: RootError>, +where + P: RParser<'s, O, E> + Copy, + E: RootError>, { - let parser = move |i| { + let parser = move |i| { let (i, _ver) = be_u16(i)?; let (i, _tobj) = tobject(i)?; let (i, _name) = c_string(i)?; let (i, size) = be_i32(i)?; let (i, _low) = be_i32(i)?; - let (i, objs): (_, Vec) = count( - parser, - size as usize, - )(i)?; + let (i, objs): (_, Vec) = count(parser, size as usize)(i)?; Ok((i, objs)) }; @@ -322,53 +420,59 @@ pub fn tobjarray<'s, E, P, O>(parser: P) -> impl RParser<'s, Vec, E> /// Parse a `TObjArray` which does not have references pointing outside of the input buffer pub fn tobjarray_no_context<'s, E>(input: Span<'s>) -> RResult<'s, Vec<(ClassInfo, Span<'s>)>, E> - where - E: RootError>, +where + E: RootError>, { tuple(( be_u16.context("TObjArray header version"), tobject.context("TObjArray object header"), c_string.context("TObjArray name"), be_i32.context("TObjArray num objects"), - be_i32.context("TObjArray unknown") - )).flat_map(make_fn(|(_, _, _, num_objects, _): (u16, TObject, &str, i32, i32)| - count(raw_no_context, num_objects.try_into().unwrap()))) - .context("TObjArray") - .parse(input) + be_i32.context("TObjArray unknown"), + )) + .flat_map(make_fn( + |(_, _, _, num_objects, _): (u16, TObject, &str, i32, i32)| { + count(raw_no_context, num_objects.try_into().unwrap()) + }, + )) + .context("TObjArray") + .parse(input) // |v| v.into_iter().map(|(ci, s)| (ci, s)).collect()) >> } pub fn tobjstring<'s, E>(input: Span<'s>) -> RResult<'s, &'s str, E> - where - E: RootError>, +where + E: RootError>, { // TODO move all_consuming to call site tuple(( be_u16.context("tobjstring version"), tobject.context("tobjstring object"), string.context("tobjstring name"), - )).all_consuming() - .context("tobjstring") - .map(|(_, _, name)| name) - .parse(input) + )) + .all_consuming() + .context("tobjstring") + .map(|(_, _, name)| name) + .parse(input) } /// Parse a so-called `TArray`. Note that ROOT's `TArray`s are actually not fixed size. /// Example usage for TArrayI: `tarray(nom::complete::be_i32).parse(input_slice)` pub fn tarray<'s, E, F, O>(parser: F) -> impl RParser<'s, Vec, E> - where - F: RParser<'s, O, E>, - E: RootError>, +where + F: RParser<'s, O, E>, + E: RootError>, { length_count(be_u32, parser).context("tarray") } /// Parse a null terminated string pub fn c_string<'s, E>(input: Span<'s>) -> RResult<'s, &str, E> - where - E: RootError>, +where + E: RootError>, { - take_until(b"\x00".as_ref()).terminated(be_u8.verify(|&v| v == 0)) + take_until(b"\x00".as_ref()) + .terminated(be_u8.verify(|&v| v == 0)) .map_res(|s: Span| str::from_utf8(&s)) .context("c string") .parse(input) @@ -379,8 +483,8 @@ pub fn c_string<'s, E>(input: Span<'s>) -> RResult<'s, &str, E> /// buffer.This is modeled after ROOT's `TBufferFile::ReadObjectAny` and /// `TBufferFile::ReadClass` pub fn classinfo<'s, E>(input: Span<'s>) -> RResult<'s, ClassInfo, E> - where - E: RootError>, +where + E: RootError>, { let (i, tag) = alt(( be_u32 @@ -389,13 +493,17 @@ pub fn classinfo<'s, E>(input: Span<'s>) -> RResult<'s, ClassInfo, E> be_u32 .verify(|&v| is_byte_count(&v) && v != Flags::NEW_CLASSTAG.bits()) .context("class info: class tag preceded by byte count") - .precedes(be_u32) - )).parse(input)?; - + .precedes(be_u32), + )) + .parse(input)?; match tag as u32 { - 0xFFFF_FFFF => { // new classtag mask - c_string.map(ClassInfo::New).context("new classtag").parse(i) + 0xFFFF_FFFF => { + // new classtag mask + c_string + .map(ClassInfo::New) + .context("new classtag") + .parse(i) } tag => { if Flags::from_bits_truncate(tag).contains(Flags::CLASS_MASK) { @@ -408,8 +516,8 @@ pub fn classinfo<'s, E>(input: Span<'s>) -> RResult<'s, ClassInfo, E> } pub fn class_name<'s, E>(ctx: &'s Context) -> impl RParser<'s, &'s str, E> - where - E: RootError> +where + E: RootError>, { let parser = move |i| { let ctx_offset = u32::try_from(ctx.offset) @@ -422,7 +530,9 @@ pub fn class_name<'s, E>(ctx: &'s Context) -> impl RParser<'s, &'s str, E> let abs_offset = tag & !Flags::CLASS_MASK.bits(); // TODO handle insufficient buffer length, abs_offset < ctx_offset let s = ctx.span().slice(((abs_offset - ctx_offset) as usize)..); - let (_, name) = class_name(ctx).context("pre-existing class name").parse(s)?; + let (_, name) = class_name(ctx) + .context("pre-existing class name") + .parse(s)?; Ok((i, name)) } ClassInfo::References(tag) => { @@ -432,7 +542,9 @@ pub fn class_name<'s, E>(ctx: &'s Context) -> impl RParser<'s, &'s str, E> } else { // TODO as above let s = ctx.span().slice(((abs_offset - ctx_offset) as usize)..); - let (_, name) = class_name(ctx).context("reference to class name").parse(s)?; + let (_, name) = class_name(ctx) + .context("reference to class name") + .parse(s)?; Ok((i, name)) } } @@ -447,8 +559,8 @@ pub fn class_name<'s, E>(ctx: &'s Context) -> impl RParser<'s, &'s str, E> /// `Context`, though, which may not be available. If so, have a look /// at the `classinfo` parser. pub fn class_name_and_buffer<'s, E>(ctx: &'s Context) -> impl RParser<'s, (&'s str, Span<'s>), E> - where - E: RootError>, +where + E: RootError>, { let parser = move |i| { let ctx_offset = u32::try_from(ctx.offset) @@ -456,7 +568,10 @@ pub fn class_name_and_buffer<'s, E>(ctx: &'s Context) -> impl RParser<'s, (&'s s let (i, ci) = classinfo(i)?; Ok(match ci { ClassInfo::New(s) => { - let (i, buf) = length_value(checked_byte_count, rest).complete().context("length-prefixed data").parse(i)?; + let (i, buf) = length_value(checked_byte_count, rest) + .complete() + .context("length-prefixed data") + .parse(i)?; (i, (s, buf)) } ClassInfo::Exists(tag) => { @@ -464,10 +579,15 @@ pub fn class_name_and_buffer<'s, E>(ctx: &'s Context) -> impl RParser<'s, (&'s s let abs_offset = tag & !Flags::CLASS_MASK.bits(); // TODO handle insufficient buffer length, abs_offset < ctx_offset let s = ctx.span().slice(((abs_offset - ctx_offset) as usize)..); - let (_, name) = class_name(ctx).context("pre-existing class name").parse(s)?; + let (_, name) = class_name(ctx) + .context("pre-existing class name") + .parse(s)?; name }; - let (i, buf) = length_value(checked_byte_count, rest).complete().context("length-prefixed data").parse(i)?; + let (i, buf) = length_value(checked_byte_count, rest) + .complete() + .context("length-prefixed data") + .parse(i)?; (i, (name, buf)) } ClassInfo::References(tag) => { @@ -479,7 +599,9 @@ pub fn class_name_and_buffer<'s, E>(ctx: &'s Context) -> impl RParser<'s, (&'s s } else { // TODO as above let s = ctx.span().slice(((abs_offset - ctx_offset) as usize)..); - let (_, (name, buf)) = class_name_and_buffer(ctx).context("reference to class").parse(s)?; + let (_, (name, buf)) = class_name_and_buffer(ctx) + .context("reference to class") + .parse(s)?; (name, buf) } }; @@ -493,19 +615,18 @@ pub fn class_name_and_buffer<'s, E>(ctx: &'s Context) -> impl RParser<'s, (&'s s /// Parse a `Raw` chunk from the given input buffer. This is useful when one does not /// know the exact type at the time of parsing pub fn raw<'s, E>(context: &'s Context) -> impl RParser<'s, Raw<'s>, E> - where - E: RootError>, +where + E: RootError>, { - class_name_and_buffer(context) - .map(|(classinfo, obj)| Raw { classinfo, obj }) + class_name_and_buffer(context).map(|(classinfo, obj)| Raw { classinfo, obj }) } /// Same as `raw` but doesn't require a `Context` as input. Panics if /// a `Context` is required to parse the underlying buffer (i.e., the /// given buffer contains a reference to some other part of the file. pub fn raw_no_context<'s, E>(input: Span<'s>) -> RResult<'s, (ClassInfo, Span<'s>), E> - where - E: RootError>, +where + E: RootError>, { use super::ClassInfo::*; let parser = |input| { @@ -514,7 +635,11 @@ pub fn raw_no_context<'s, E>(input: Span<'s>) -> RResult<'s, (ClassInfo, Span<'s match ci { // point to beginning of slice References(0) => take(0usize).map(|o| (ci, o)).parse(input), - New(_) | Exists(_) => length_data(checked_byte_count).complete().context("length-prefixed data").map(|o| (ci, o)).parse(input), + New(_) | Exists(_) => length_data(checked_byte_count) + .complete() + .context("length-prefixed data") + .map(|o| (ci, o)) + .parse(input), // If its a reference to any other thing but 0 it needs a context _ => panic!("Object needs context!"), } @@ -529,11 +654,14 @@ pub fn raw_no_context<'s, E>(input: Span<'s>) -> RResult<'s, (ClassInfo, Span<'s /// as an `TObjArray` of `TNamed` objects for each event. This breaks /// it down to a simple vector pub fn parse_tobjarray_of_tnameds<'s, E>(input: Span<'s>) -> RResult<'s, Vec, E> - where - E: RootError>, +where + E: RootError>, { // each element of the tobjarray has a Vec - let (input, vals) = length_value(checked_byte_count, tobjarray_no_context).complete().context("length-prefixed array").parse(input)?; + let (input, vals) = length_value(checked_byte_count, tobjarray_no_context) + .complete() + .context("length-prefixed array") + .parse(input)?; let strings = vals .into_iter() .map(|(ci, el)| { @@ -552,17 +680,19 @@ pub fn parse_tobjarray_of_tnameds<'s, E>(input: Span<'s>) -> RResult<'s, Vec(input: Span<'s>, nbits: usize) -> RResult<'s, f32, E> - where - E: RootError>, +where + E: RootError>, { // TODO: Use ByteOrder crate to be cross-platform? - pair(be_u8, be_u16).map(|(exp, man)| { - let mut s = u32::from(exp); - // Move the exponent into the last 23 bits - s <<= 23; - s |= (u32::from(man) & ((1 << (nbits + 1)) - 1)) << (23 - nbits); - f32::from_bits(s) - }).parse(input) + pair(be_u8, be_u16) + .map(|(exp, man)| { + let mut s = u32::from(exp); + // Move the exponent into the last 23 bits + s <<= 23; + s |= (u32::from(man) & ((1 << (nbits + 1)) - 1)) << (23 - nbits); + f32::from_bits(s) + }) + .parse(input) } #[cfg(test)] diff --git a/root-io/src/core/tkey.rs b/root-io/src/core/tkey.rs index dd4622c..ccc3377 100644 --- a/root-io/src/core/tkey.rs +++ b/root-io/src/core/tkey.rs @@ -1,12 +1,12 @@ -use nom::*; use nom::bytes::complete::take; use nom::multi::length_count; use nom::number::complete::{be_i16, be_u16, be_u32, be_u64}; use nom::sequence::tuple; +use nom::*; use nom_supreme::ParserExt; -use crate::core::*; use crate::core::compression::{decompress, DecompressionError}; +use crate::core::*; #[derive(Debug, Clone)] #[allow(dead_code)] @@ -36,8 +36,8 @@ pub struct TKey { /// Usually, TKeys are followed up by their content, but there is one "index" in every /// root file where only the TKey headers are stored for faster later `Seek`ing pub fn tkey_header<'s, E>(input: Span<'s>) -> RResult<'s, TKeyHeader, E> - where - E: RootError> +where + E: RootError>, { let (i, hdr) = tuple(( be_u32.context("total size"), @@ -45,34 +45,44 @@ pub fn tkey_header<'s, E>(input: Span<'s>) -> RResult<'s, TKeyHeader, E> be_u32.context("uncompressed length"), be_u32.context("datime"), be_i16.context("key length"), - be_i16.context("cycle") - )).flat_map(make_fn(|(total_size, version, uncomp_len, datime, key_len, cycle)| tuple(( - seek_point(version).context("seek key"), - seek_point(version).context("seek pdir"), - string.context("class name"), - string.context("object name"), - string.context("object title") - )).map(move |(seek_key, seek_pdir, class_name, obj_name, obj_title)| TKeyHeader { - total_size, - version, - uncomp_len, - datime, - key_len, - cycle, - seek_key, - seek_pdir, - class_name: class_name.to_string(), - obj_name: obj_name.to_string(), - obj_title: obj_title.to_string(), - }))).context("tkey header").parse(input)?; + be_i16.context("cycle"), + )) + .flat_map(make_fn( + |(total_size, version, uncomp_len, datime, key_len, cycle)| { + tuple(( + seek_point(version).context("seek key"), + seek_point(version).context("seek pdir"), + string.context("class name"), + string.context("object name"), + string.context("object title"), + )) + .map( + move |(seek_key, seek_pdir, class_name, obj_name, obj_title)| TKeyHeader { + total_size, + version, + uncomp_len, + datime, + key_len, + cycle, + seek_key, + seek_pdir, + class_name: class_name.to_string(), + obj_name: obj_name.to_string(), + obj_title: obj_title.to_string(), + }, + ) + }, + )) + .context("tkey header") + .parse(input)?; Ok((i, hdr)) } /// Parse a file-pointer based on the version of the file fn seek_point<'s, E>(version: u16) -> impl RParser<'s, u64, E> - where - E: RootError> +where + E: RootError>, { move |i| { if version > 1000 { @@ -85,8 +95,8 @@ fn seek_point<'s, E>(version: u16) -> impl RParser<'s, u64, E> /// Parse a full TKey including its payload pub fn tkey<'s, E>(input: Span<'s>) -> RResult<'s, TKey, E> - where - E: RootError> +where + E: RootError>, { let (i, hdr) = tkey_header.parse(input)?; let buflen = hdr.total_size - hdr.key_len as u32; @@ -94,22 +104,31 @@ pub fn tkey<'s, E>(input: Span<'s>) -> RResult<'s, TKey, E> let mut opthdr = Some(hdr); - take(buflen).map_res::<_, _, DecompressionError>(|buf: Span| { - let obj = if uncomp_len as usize > buf.len() { - decompress(&buf)? - } else { - buf.to_vec() - }; - Ok(TKey { hdr: opthdr.take().unwrap(), obj }) - }).context("tkey").parse(i) + take(buflen) + .map_res::<_, _, DecompressionError>(|buf: Span| { + let obj = if uncomp_len as usize > buf.len() { + decompress(&buf)? + } else { + buf.to_vec() + }; + Ok(TKey { + hdr: opthdr.take().unwrap(), + obj, + }) + }) + .context("tkey") + .parse(i) } // Note that tkey current /// Special thing for the keylist in the file header // Note that tkey_headers currently does not parse the entire input buffer // See: read_cms_file_remote for an example pub(crate) fn tkey_headers<'s, E>(input: Span<'s>) -> RResult<'s, Vec, E> - where - E: RootError> +where + E: RootError>, { - length_count(be_u32, tkey_header).complete().context("count-prefixed data").parse(input) + length_count(be_u32, tkey_header) + .complete() + .context("count-prefixed data") + .parse(input) } diff --git a/root-io/src/core/tstreamer.rs b/root-io/src/core/tstreamer.rs index b3bac29..4b96522 100644 --- a/root-io/src/core/tstreamer.rs +++ b/root-io/src/core/tstreamer.rs @@ -1,18 +1,18 @@ -use nom::Parser; use nom::multi::{count, length_count, length_value}; use nom::number::complete::{be_i32, be_u16, be_u32}; use nom::sequence::{pair, tuple}; +use nom::Parser; use nom_supreme::ParserExt; use quote::*; use std::fmt::Debug; +use crate::core::SemanticError::VersionNotSupported; use crate::{ code_gen::rust::{ToRustParser, ToRustType}, code_gen::utils::{alias_or_lifetime, sanitize, type_is_core}, core::*, }; -use crate::core::SemanticError::VersionNotSupported; /// Union of all posible `TStreamers`. See figure at /// @@ -98,94 +98,122 @@ pub(crate) struct TStreamerElement { /// Parse a `TStreamer` from a `Raw` buffer. This is usually the case /// after reading the `TList` of `TStreamerInfo`s from a ROOT file pub(crate) fn tstreamer<'s, E>(ctx: &'s Context) -> impl RParser<'s, TStreamer, E> + Copy - where - E: RootError>, +where + E: RootError>, { let parser = move |i| { let (i, (classinfo, obj)) = class_name_and_buffer(ctx).parse(i)?; - let wrapped_tstreamerelem = length_value(checked_byte_count, tstreamerelement); let (_, streamer) = match classinfo { "TStreamerBase" => tuple(( be_u16.context("version"), wrapped_tstreamerelem, - be_i32.context("version base") - )).map(|(_ver, el, version_base)| TStreamer::Base { el, version_base }) - .all_consuming().context("tstreamer (base)").parse(obj), - - "TStreamerBasicType" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::BasicType { el }) - .all_consuming().context("tstreamer (basic type)").parse(obj), + be_i32.context("version base"), + )) + .map(|(_ver, el, version_base)| TStreamer::Base { el, version_base }) + .all_consuming() + .context("tstreamer (base)") + .parse(obj), + + "TStreamerBasicType" => pair(be_u16.context("version"), wrapped_tstreamerelem) + .map(|(_ver, el)| TStreamer::BasicType { el }) + .all_consuming() + .context("tstreamer (basic type)") + .parse(obj), "TStreamerBasicPointer" => tuple(( be_u16.context("version"), wrapped_tstreamerelem, be_i32.context("cvers"), string.context("cname"), - string.context("ccls") - )).map(|(_ver, el, cvers, cname, ccls)| TStreamer::BasicPointer { el, cvers, cname: cname.to_string(), ccls: ccls.to_string() }) - .all_consuming().context("tstreamer (basic pointer)").parse(obj), + string.context("ccls"), + )) + .map(|(_ver, el, cvers, cname, ccls)| TStreamer::BasicPointer { + el, + cvers, + cname: cname.to_string(), + ccls: ccls.to_string(), + }) + .all_consuming() + .context("tstreamer (basic pointer)") + .parse(obj), "TStreamerLoop" => tuple(( be_u16.context("version"), wrapped_tstreamerelem, be_i32.context("cvers"), string.context("cname"), - string.context("ccls") - )).map(|(_ver, el, cvers, cname, ccls)| TStreamer::Loop { el, cvers, cname: cname.to_string(), ccls: ccls.to_string() }) - .all_consuming().context("tstreamer (loop)").parse(obj), - - "TStreamerObject" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::Object { el }) - .all_consuming().context("tstreamer (object)").parse(obj), - - "TStreamerObjectPointer" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::ObjectPointer { el }) - .all_consuming().context("tstreamer (object pointer)").parse(obj), - - "TStreamerObjectAny" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::ObjectAny { el }) - .all_consuming().context("tstreamer (object (any))").parse(obj), - - "TStreamerObjectAnyPointer" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::ObjectAnyPointer { el }) - .all_consuming().context("tstreamer (object pointer (any))").parse(obj), - - "TStreamerString" => pair( - be_u16.context("version"), - wrapped_tstreamerelem, - ).map(|(_ver, el)| TStreamer::String { el }) - .all_consuming().context("tstreamer (string)").parse(obj), + string.context("ccls"), + )) + .map(|(_ver, el, cvers, cname, ccls)| TStreamer::Loop { + el, + cvers, + cname: cname.to_string(), + ccls: ccls.to_string(), + }) + .all_consuming() + .context("tstreamer (loop)") + .parse(obj), + + "TStreamerObject" => pair(be_u16.context("version"), wrapped_tstreamerelem) + .map(|(_ver, el)| TStreamer::Object { el }) + .all_consuming() + .context("tstreamer (object)") + .parse(obj), + + "TStreamerObjectPointer" => pair(be_u16.context("version"), wrapped_tstreamerelem) + .map(|(_ver, el)| TStreamer::ObjectPointer { el }) + .all_consuming() + .context("tstreamer (object pointer)") + .parse(obj), + + "TStreamerObjectAny" => pair(be_u16.context("version"), wrapped_tstreamerelem) + .map(|(_ver, el)| TStreamer::ObjectAny { el }) + .all_consuming() + .context("tstreamer (object (any))") + .parse(obj), + + "TStreamerObjectAnyPointer" => pair(be_u16.context("version"), wrapped_tstreamerelem) + .map(|(_ver, el)| TStreamer::ObjectAnyPointer { el }) + .all_consuming() + .context("tstreamer (object pointer (any))") + .parse(obj), + + "TStreamerString" => pair(be_u16.context("version"), wrapped_tstreamerelem) + .map(|(_ver, el)| TStreamer::String { el }) + .all_consuming() + .context("tstreamer (string)") + .parse(obj), "TStreamerSTL" => tuple(( be_u16.context("version"), wrapped_tstreamerelem, be_i32.map(StlTypeID::new).context("vtype"), - be_i32.map_res(TypeId::new).context("ctype") - )).map(|(_ver, el, vtype, ctype)| TStreamer::Stl { el, vtype, ctype }) - .all_consuming().context("tstreamer (stl)").parse(obj), + be_i32.map_res(TypeId::new).context("ctype"), + )) + .map(|(_ver, el, vtype, ctype)| TStreamer::Stl { el, vtype, ctype }) + .all_consuming() + .context("tstreamer (stl)") + .parse(obj), "TStreamerSTLstring" => { // Two version bcs `stlstring` derives from `stl` - be_u16.precedes(length_value(checked_byte_count, tuple(( - be_u16.context("version"), - wrapped_tstreamerelem, - be_i32.map(StlTypeID::new).context("vtype"), - be_i32.map_res(TypeId::new).context("ctype") - )))).map(|(_ver, el, vtype, ctype)| TStreamer::StlString { el, vtype, ctype }) - .all_consuming().context("tstreamer (stl string)").parse(obj) + be_u16 + .precedes(length_value( + checked_byte_count, + tuple(( + be_u16.context("version"), + wrapped_tstreamerelem, + be_i32.map(StlTypeID::new).context("vtype"), + be_i32.map_res(TypeId::new).context("ctype"), + )), + )) + .map(|(_ver, el, vtype, ctype)| TStreamer::StlString { el, vtype, ctype }) + .all_consuming() + .context("tstreamer (stl string)") + .parse(obj) } ci => unimplemented!("Unknown TStreamer {}", ci), }?; @@ -198,8 +226,8 @@ pub(crate) fn tstreamer<'s, E>(ctx: &'s Context) -> impl RParser<'s, TStreamer, /// Return all `TSreamerInfo` for the data in this file pub fn streamers<'s, E>(ctx: &'s Context) -> impl RParser<'s, Vec, E> + 's - where - E: RootError>, +where + E: RootError>, { let parser = move |i| { // Dunno why we are 4 bytes off with the size of the streamer info... @@ -214,7 +242,12 @@ pub fn streamers<'s, E>(ctx: &'s Context) -> impl RParser<'s, Vec "TStreamerInfo" => Some(raw.obj), _ => None, }) - .map(|buf| Ok(tstreamerinfo(ctx).context("in streamers listing").parse(buf)?.1)) + .map(|buf| { + Ok(tstreamerinfo(ctx) + .context("in streamers listing") + .parse(buf)? + .1) + }) .collect(); let streamers = _streamers?; @@ -238,9 +271,9 @@ pub fn streamers<'s, E>(ctx: &'s Context) -> impl RParser<'s, Vec for raw in tlist_objs { match raw.classinfo { "TStreamerInfo" | "TList" => {} - other => println!("Got unexpected class in streamers list: {other}") + other => println!("Got unexpected class in streamers list: {other}"), } - }; + } Ok((i, streamers)) }; @@ -250,39 +283,54 @@ pub fn streamers<'s, E>(ctx: &'s Context) -> impl RParser<'s, Vec /// The element which is wrapped in a TStreamer fn tstreamerelement<'s, E>(input: Span<'s>) -> RResult<'s, TStreamerElement, E> - where - E: RootError>, +where + E: RootError>, { - tuple(( be_u16.context("version"), length_value(checked_byte_count, tnamed).context("name"), be_i32.map_res(TypeId::new).context("element type"), be_i32.context("size"), be_i32.context("array length"), - be_i32.context("array dimensions") - )).flat_map(make_fn(|(ver, name, el_type, size, array_len, array_dim): (u16, TNamed, TypeId, i32, i32, i32)| { - let mut optname = Some(name); - tuple(( - move |i| if ver == 1 { length_count(be_u32, be_u32)(i) } else { count(be_u32, 5)(i) }, - string, - )).map_res(move |(max_idx, type_name)| { - if ver <= 3 { - Err(VersionNotSupported(Component::TStreamerElement, ver as u32, "must be >= 4")) - } else { - Ok(TStreamerElement { - ver, - name: optname.take().unwrap(), - el_type, - size, - array_len, - array_dim, - max_idx, - type_name: type_name.to_string(), - }) - } - }) - })).context("tstreamer element").parse(input) + be_i32.context("array dimensions"), + )) + .flat_map(make_fn( + |(ver, name, el_type, size, array_len, array_dim): (u16, TNamed, TypeId, i32, i32, i32)| { + let mut optname = Some(name); + tuple(( + move |i| { + if ver == 1 { + length_count(be_u32, be_u32)(i) + } else { + count(be_u32, 5)(i) + } + }, + string, + )) + .map_res(move |(max_idx, type_name)| { + if ver <= 3 { + Err(VersionNotSupported( + Component::TStreamerElement, + ver as u32, + "must be >= 4", + )) + } else { + Ok(TStreamerElement { + ver, + name: optname.take().unwrap(), + el_type, + size, + array_len, + array_dim, + max_idx, + type_name: type_name.to_string(), + }) + } + }) + }, + )) + .context("tstreamer element") + .parse(input) } impl TStreamer { diff --git a/root-io/src/core/tstreamerinfo.rs b/root-io/src/core/tstreamerinfo.rs index c271895..b026e84 100644 --- a/root-io/src/core/tstreamerinfo.rs +++ b/root-io/src/core/tstreamerinfo.rs @@ -22,8 +22,8 @@ pub struct TStreamerInfo { /// Parse one `TStreamerInfo` object (as found in the `TList`) pub(crate) fn tstreamerinfo<'s, E>(context: &'s Context) -> impl RParser<'s, TStreamerInfo, E> - where - E: RootError>, +where + E: RootError>, { let parser = move |i| { let parse_members = tobjarray(tstreamer(context)).context("tstreamerinfo members"); diff --git a/root-io/src/core/typeid.rs b/root-io/src/core/typeid.rs index 8ed99ae..4ea4d36 100644 --- a/root-io/src/core/typeid.rs +++ b/root-io/src/core/typeid.rs @@ -21,7 +21,6 @@ pub enum InvalidPrimitive { Array(i32), } - /// Integer ID describing a streamed type in a `TStreamer` #[derive(Debug, Clone, Copy)] pub(crate) enum TypeId { diff --git a/root-io/src/core/types.rs b/root-io/src/core/types.rs index 189440a..aa1c5ae 100644 --- a/root-io/src/core/types.rs +++ b/root-io/src/core/types.rs @@ -26,7 +26,7 @@ bitflags! { /// Used in `TStreamerInfo` /// Describes if the following entry is a new class or if it was /// already described. -#[derive(Clone,Copy,Debug)] +#[derive(Clone, Copy, Debug)] pub enum ClassInfo<'s> { /// Class name of the new class New(&'s str), diff --git a/root-io/src/lib.rs b/root-io/src/lib.rs index b69be4d..d3eecae 100644 --- a/root-io/src/lib.rs +++ b/root-io/src/lib.rs @@ -14,7 +14,6 @@ //! files as easy as possible. If you are looking for a particular //! parser chances have it that it exists but it is not marked as `pub`. #![feature(negative_impls)] - #![allow(clippy::cognitive_complexity)] #![recursion_limit = "256"] extern crate alice_open_data; diff --git a/root-io/src/tests/basic_io.rs b/root-io/src/tests/basic_io.rs index f389422..3fe0f55 100644 --- a/root-io/src/tests/basic_io.rs +++ b/root-io/src/tests/basic_io.rs @@ -37,21 +37,27 @@ fn list_of_rules() { }; // TODO we parse this object pretty weirdly - let mut parser = wrap_parser_ctx(|ctx| move |i| { - use nom::HexDisplay; - let (leftover, (name, obj)) = class_name_and_buffer(ctx).parse(i)?; - let len = obj.fragment().len(); - println!("{name}: Located span of length {len}"); - println!("{}", obj.fragment().to_hex(16)); - //let (_, l) = tlist(ctx).parse(obj)?; - let (_leftover, ci) = classinfo(obj)?; - println!("As classinfo: {ci:?}"); - Ok((leftover, (name, obj))) + let mut parser = wrap_parser_ctx(|ctx| { + move |i| { + use nom::HexDisplay; + let (leftover, (name, obj)) = class_name_and_buffer(ctx).parse(i)?; + let len = obj.fragment().len(); + println!("{name}: Located span of length {len}"); + println!("{}", obj.fragment().to_hex(16)); + //let (_, l) = tlist(ctx).parse(obj)?; + let (_leftover, ci) = classinfo(obj)?; + println!("As classinfo: {ci:?}"); + Ok((leftover, (name, obj))) + } }); let (name, l) = match parser(&context) { - Ok((name, l)) => (name, l), - Err(e) => { println!("{}", e); assert!(false); unreachable!() } + Ok((name, l)) => (name, l), + Err(e) => { + println!("{}", e); + assert!(false); + unreachable!() + } }; println!("name = {}\nlist = {:?}", name, l); // let (_obj, l) = tlist(obj, &context).unwrap(); diff --git a/root-io/src/tree_reader/branch.rs b/root-io/src/tree_reader/branch.rs index dc4d72f..45ae0cb 100644 --- a/root-io/src/tree_reader/branch.rs +++ b/root-io/src/tree_reader/branch.rs @@ -1,5 +1,10 @@ use futures::prelude::*; -use nom::{error::VerboseError, IResult, multi::{count, length_data, length_value}, number::complete::*, Parser}; +use nom::{ + error::VerboseError, + multi::{count, length_data, length_value}, + number::complete::*, + IResult, Parser, +}; use nom_supreme::ParserExt; use std::fmt::Debug; @@ -112,9 +117,9 @@ impl TBranch { /// }).await; ///# } /// ``` - pub fn as_fixed_size_iterator(&self, p: P) -> impl Stream - where - P: Fn(Span) -> IResult>, + pub fn as_fixed_size_iterator(&self, p: P) -> impl Stream + where + P: Fn(Span) -> IResult>, { stream::iter(self.containers().to_owned()) .then(|basket| async move { basket.raw_data().await.unwrap() }) @@ -168,21 +173,21 @@ impl TBranch { /// `TBranchElements` are a subclass of `TBranch` if the content is an Object /// We ignore the extra information for now and just parse the TBranch"Header" in either case pub(crate) fn tbranch_hdr<'s, E>(ctxt: &'s Context) -> impl RParser<'s, TBranch, E> + Copy - where - E: RootError>, +where + E: RootError>, { let parser = move |i| { let (i, (classinfo, obj)) = class_name_and_buffer(ctxt).parse(i)?; let (_, branch) = match classinfo { - "TBranchElement" | "TBranchObject" => { - be_u16.precedes(length_value(checked_byte_count, tbranch(ctxt)).complete().context("length-prefixed data")) - .parse(obj) - } - "TBranch" => - tbranch(ctxt) - .context("tbranch wrapper") - .parse(obj), + "TBranchElement" | "TBranchObject" => be_u16 + .precedes( + length_value(checked_byte_count, tbranch(ctxt)) + .complete() + .context("length-prefixed data"), + ) + .parse(obj), + "TBranch" => tbranch(ctxt).context("tbranch wrapper").parse(obj), name => panic!("Unexpected Branch type {}", name), }?; @@ -194,13 +199,21 @@ pub(crate) fn tbranch_hdr<'s, E>(ctxt: &'s Context) -> impl RParser<'s, TBranch, // TODO: tbranch currently does not parse tbranch objects in its entirety (see e.g. fn tbranch<'s, E>(context: &'s Context) -> impl RParser<'s, TBranch, E> - where - E: RootError>, +where + E: RootError>, { let parser = move |inpt| { - let (i, _ver) = be_u16.verify(|v| [11, 12].contains(v)).context("assertion: branch version must be 11 or 12").parse(inpt)?; - let (i, tnamed) = length_value(checked_byte_count, tnamed).complete().context("tnamed").parse(i)?; - let (i, _tattfill) = length_data(checked_byte_count).context("tattrfill").parse(i)?; + let (i, _ver) = be_u16 + .verify(|v| [11, 12].contains(v)) + .context("assertion: branch version must be 11 or 12") + .parse(inpt)?; + let (i, tnamed) = length_value(checked_byte_count, tnamed) + .complete() + .context("tnamed") + .parse(i)?; + let (i, _tattfill) = length_data(checked_byte_count) + .context("tattrfill") + .parse(i)?; let (i, fcompress) = be_i32(i)?; let (i, fbasketsize) = be_i32(i)?; let (i, fentryoffsetlen) = be_i32(i)?; @@ -213,23 +226,31 @@ fn tbranch<'s, E>(context: &'s Context) -> impl RParser<'s, TBranch, E> let (i, ffirstentry) = be_i64(i)?; let (i, ftotbytes) = be_i64(i)?; let (i, fzipbytes) = be_i64(i)?; - let (i, fbranches) = - length_value(checked_byte_count, tobjarray(tbranch_hdr(context))).complete().context("fbranches").parse(i)?; - let (i, fleaves) = - length_value(checked_byte_count, tobjarray(TLeaf::parse(context))).complete().context("fleaves").parse(i)?; - let (i, fbaskets) = - length_value(checked_byte_count, - tobjarray(|i| class_name_and_buffer(context).map(|(_, buf)| buf).parse(i))) - .complete() - .context("fbaskets") - .parse(i)?; - let (i, fbasketbytes) = be_u8.precedes(count(be_i32, fmaxbaskets as usize)) + let (i, fbranches) = length_value(checked_byte_count, tobjarray(tbranch_hdr(context))) + .complete() + .context("fbranches") + .parse(i)?; + let (i, fleaves) = length_value(checked_byte_count, tobjarray(TLeaf::parse(context))) + .complete() + .context("fleaves") + .parse(i)?; + let (i, fbaskets) = length_value( + checked_byte_count, + tobjarray(|i| class_name_and_buffer(context).map(|(_, buf)| buf).parse(i)), + ) + .complete() + .context("fbaskets") + .parse(i)?; + let (i, fbasketbytes) = be_u8 + .precedes(count(be_i32, fmaxbaskets as usize)) .context("fbasketbytes") .parse(i)?; - let (i, fbasketentry) = be_u8.precedes(count(be_i64, fmaxbaskets as usize)) + let (i, fbasketentry) = be_u8 + .precedes(count(be_i64, fmaxbaskets as usize)) .context("fbasketentry") .parse(i)?; - let (i, fbasketseek) = be_u8.precedes(count(be_u64, fmaxbaskets as usize)) + let (i, fbasketseek) = be_u8 + .precedes(count(be_u64, fmaxbaskets as usize)) .context("fbasketseek") .parse(i)?; let (i, ffilename) = string.context("ffilename").parse(i)?; diff --git a/root-io/src/tree_reader/container.rs b/root-io/src/tree_reader/container.rs index cefbd2c..ad9adb0 100644 --- a/root-io/src/tree_reader/container.rs +++ b/root-io/src/tree_reader/container.rs @@ -1,11 +1,11 @@ -use nom::*; use nom::combinator::rest; use nom::number::complete::{be_i8, be_u16, be_u32}; use nom::sequence::tuple; +use nom::*; use nom_supreme::ParserExt; -use crate::core::*; use crate::core::ReadError; +use crate::core::*; #[derive(Debug, Clone)] pub(crate) enum Container { @@ -15,7 +15,6 @@ pub(crate) enum Container { OnDisk(Source, u64, u64), } - impl Container { /// Return the number of entries and the data; reading it from disk if necessary pub(crate) async fn raw_data<'s>(self) -> Result<(u32, Vec), ReadError> { @@ -40,8 +39,8 @@ impl Container { /// Return a tuple indicating the number of elements in this basket /// and the content as a Vec fn tbasket2vec<'s, E>(input: Span<'s>) -> RResult<'s, (u32, Vec), E> - where - E : RootError> +where + E: RootError>, { tuple(( tkey_header.context("header"), @@ -51,8 +50,9 @@ fn tbasket2vec<'s, E>(input: Span<'s>) -> RResult<'s, (u32, Vec), E> be_u32.context("number of entries in buffer"), be_u32.context("last"), be_i8.context("flags"), - rest.context("buffer") - )).map_res::<_, _, DecompressionError>(|(hdr, _, _, _, n_entry_buf, last, _, buf)| { + rest.context("buffer"), + )) + .map_res::<_, _, DecompressionError>(|(hdr, _, _, _, n_entry_buf, last, _, buf)| { let buf = if hdr.uncomp_len as usize > buf.len() { decompress(&buf)? } else { @@ -63,7 +63,9 @@ fn tbasket2vec<'s, E>(input: Span<'s>) -> RResult<'s, (u32, Vec), E> // whereby we have to take the key_len into account... let useful_bytes = (last - hdr.key_len as u32) as usize; Ok((n_entry_buf, buf.as_slice()[..useful_bytes].to_vec())) - }).context("tbasket2vec").parse(input) + }) + .context("tbasket2vec") + .parse(input) } #[cfg(test)] diff --git a/root-io/src/tree_reader/leafs.rs b/root-io/src/tree_reader/leafs.rs index adfd80a..f5a1ad8 100644 --- a/root-io/src/tree_reader/leafs.rs +++ b/root-io/src/tree_reader/leafs.rs @@ -1,6 +1,6 @@ -use nom::{combinator::verify, multi::length_value, number::complete::*, Parser}; use nom::branch::alt; use nom::sequence::tuple; +use nom::{combinator::verify, multi::length_value, number::complete::*, Parser}; use nom_supreme::ParserExt; use quote::{Ident, Tokens}; @@ -10,9 +10,7 @@ use crate::{code_gen::rust::ToRustType, core::*}; /// Parse a bool from a big endian u8 fn be_bool<'s, E: RootError>>(i: Span<'s>) -> RResult<'s, bool, E> { - let (i, byte) = be_u8 - .verify(|&byte| byte == 0 || byte == 1) - .parse(i)?; + let (i, byte) = be_u8.verify(|&byte| byte == 0 || byte == 1).parse(i)?; Ok((i, byte == 1)) } @@ -24,11 +22,9 @@ pub struct TLeaf { impl TLeaf { // A helper function to get around some lifetime issues on the caller sider - pub(crate) fn parse<'s, E>( - ctxt: &'s Context, - ) -> impl RParser<'s, Self, E> + Copy - where - E: RootError>, + pub(crate) fn parse<'s, E>(ctxt: &'s Context) -> impl RParser<'s, Self, E> + Copy + where + E: RootError>, { let parser = move |i| { let (i, (classinfo, obj)) = class_name_and_buffer(ctxt).parse(i)?; @@ -56,8 +52,8 @@ enum TLeafVariant { impl TLeafVariant { fn parse<'s, E>(context: &'s Context, classinfo: &'s str, i: Span<'s>) -> RResult<'s, Self, E> - where - E: RootError>, + where + E: RootError>, { match classinfo { "TLeafB" => TLeafB::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafB(l))), @@ -69,7 +65,9 @@ impl TLeafVariant { "TLeafC" => TLeafC::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafC(l))), "TLeafO" => TLeafO::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafO(l))), "TLeafD32" => TLeafD32::parse(i, context).map(|(i, l)| (i, TLeafVariant::TLeafD32(l))), - "TLeafElement" => TLeafElement::parse(context).map(TLeafVariant::TLeafElement).parse(i), + "TLeafElement" => TLeafElement::parse(context) + .map(TLeafVariant::TLeafElement) + .parse(i), name => unimplemented!("Unexpected Leaf type {}", name), } } @@ -95,8 +93,7 @@ macro_rules! make_tleaf_variant { { // All known descendens have version 1 let (i, _) = verify(be_u16, |&ver| ver == 1)(input)?; - let (i, base) = - length_value(checked_byte_count, TLeafBase::parse(context))(i)?; + let (i, base) = length_value(checked_byte_count, TLeafBase::parse(context))(i)?; let (i, fminimum) = $parser(i)?; let (i, fmaximum) = $parser(i)?; let obj = Self { @@ -143,16 +140,20 @@ struct TLeafElement { impl TLeafElement { fn parse<'s, E>(context: &'s Context) -> impl RParser<'s, Self, E> - where - E: RootError>, + where + E: RootError>, { - be_u16.verify(|&ver| ver == 1).precedes( - tuple(( - length_value(checked_byte_count, TLeafBase::parse(context)), - be_i32, - be_i32.map_res(TypeId::new) - )).map(make_fn(|(base, fid, ftype)| Self { base, fid, ftype })) - ).context("TLeaf") + be_u16 + .verify(|&ver| ver == 1) + .precedes( + tuple(( + length_value(checked_byte_count, TLeafBase::parse(context)), + be_i32, + be_i32.map_res(TypeId::new), + )) + .map(make_fn(|(base, fid, ftype)| Self { base, fid, ftype })), + ) + .context("TLeaf") } } @@ -179,8 +180,8 @@ struct TLeafBase { impl TLeafBase { fn parse<'s, E>(context: &'s Context) -> impl RParser<'s, Self, E> - where - E: RootError>, + where + E: RootError>, { move |i| { let (i, ver) = be_u16(i)?; @@ -193,9 +194,9 @@ impl TLeafBase { let (i, fleafcount) = { alt(( be_u32.verify(|&v| v == 0).map(|_| None), - TLeaf::parse(context) - .map(|TLeaf { variant }| Some(Box::new(variant))) - )).parse(i)? + TLeaf::parse(context).map(|TLeaf { variant }| Some(Box::new(variant))), + )) + .parse(i)? }; Ok(( i, diff --git a/root-io/src/tree_reader/mod.rs b/root-io/src/tree_reader/mod.rs index d8a6568..a2102ba 100644 --- a/root-io/src/tree_reader/mod.rs +++ b/root-io/src/tree_reader/mod.rs @@ -4,7 +4,7 @@ //! several elements per collision. This module provides two Iterator //! structs in order to iterate over these columns (`TBranches` in //! ROOT lingo). -pub use self::tree::{Tree, ttree}; +pub use self::tree::{ttree, Tree}; mod branch; mod container; @@ -20,13 +20,10 @@ mod tests { use crate::core::RootFile; use crate::core::UnwrapPrint; - #[tokio::test] async fn simple_tree() { let path = PathBuf::from("./src/test_data/simple.root"); - let f = RootFile::new(path.as_path()) - .await - .unwrap_print(); + let f = RootFile::new(path.as_path()).await.unwrap_print(); f.items()[0].as_tree().await.unwrap_print(); } } diff --git a/root-io/src/tree_reader/tree.rs b/root-io/src/tree_reader/tree.rs index 25b5b19..e9ec8d4 100644 --- a/root-io/src/tree_reader/tree.rs +++ b/root-io/src/tree_reader/tree.rs @@ -1,10 +1,10 @@ use nom::branch::alt; use nom::combinator::cond; -use nom::multi::{count, length_value}; use nom::multi::length_data; +use nom::multi::{count, length_value}; use nom::number::complete::{be_f64, be_i32, be_i64, be_u16, be_u32, be_u8}; -use nom::Parser; use nom::sequence::preceded; +use nom::Parser; use nom_supreme::ParserExt; use thiserror::Error; @@ -13,8 +13,8 @@ use std::fmt::Debug; use std::ops::Deref; use crate::{ - core::parsers::*, core::types::*, tree_reader::branch::TBranch, - tree_reader::branch::tbranch_hdr, tree_reader::leafs::TLeaf, + core::parsers::*, core::types::*, tree_reader::branch::tbranch_hdr, + tree_reader::branch::TBranch, tree_reader::leafs::TLeaf, }; /// `TTree` potentially has members with very large `Vec` buffers @@ -116,36 +116,61 @@ impl<'s> Tree { self.branches() .into_iter() .find(|b| b.name == name) - .ok_or_else(|| MissingBranch(name.to_string(), - self.branches() - .iter() - .map(|b| b.name.to_string()) - .collect::>())) + .ok_or_else(|| { + MissingBranch( + name.to_string(), + self.branches() + .iter() + .map(|b| b.name.to_string()) + .collect::>(), + ) + }) } } /// Parse a `Tree` from the given buffer. Usually used through `FileItem::parse_with`. #[allow(clippy::unnecessary_unwrap)] pub fn ttree<'s, E>(context: &'s Context) -> impl RParser<'s, Tree, E> - where - E: RootError>, +where + E: RootError>, { let parser = move |i| { let none_or_u8_buf = |input: Span<'s>| { alt(( - be_u32.verify(|&v| v == 0) + be_u32 + .verify(|&v| v == 0) .map(|_| None) .context("empty ttree buffer"), raw(context) .map(|r| Some(r.obj.to_vec())) - .context("filled ttree buffer") - )).parse(input) + .context("filled ttree buffer"), + )) + .parse(input) }; - let (i, ver) = be_u16.verify(|v| [16, 17, 18, 19].contains(v)).context("assertion: ttree version is in 16-19").parse(i)?; - let (i, tnamed) = length_value(checked_byte_count, tnamed).context("tnamed").complete().context("length-prefixed data").parse(i)?; - let (i, _tattline) = length_data(checked_byte_count).context("tattrline").complete().context("length-prefixed data").parse(i)?; - let (i, _tattfill) = length_data(checked_byte_count).context("tattrfill").complete().context("length-prefixed data").parse(i)?; - let (i, _tattmarker) = length_data(checked_byte_count).context("tattrmarker").complete().context("length-prefixed data").parse(i)?; + let (i, ver) = be_u16 + .verify(|v| [16, 17, 18, 19].contains(v)) + .context("assertion: ttree version is in 16-19") + .parse(i)?; + let (i, tnamed) = length_value(checked_byte_count, tnamed) + .context("tnamed") + .complete() + .context("length-prefixed data") + .parse(i)?; + let (i, _tattline) = length_data(checked_byte_count) + .context("tattrline") + .complete() + .context("length-prefixed data") + .parse(i)?; + let (i, _tattfill) = length_data(checked_byte_count) + .context("tattrfill") + .complete() + .context("length-prefixed data") + .parse(i)?; + let (i, _tattmarker) = length_data(checked_byte_count) + .context("tattrmarker") + .complete() + .context("length-prefixed data") + .parse(i)?; let (i, fentries) = be_i64(i)?; let (i, ftotbytes) = be_i64(i)?; let (i, fzipbytes) = be_i64(i)?; @@ -185,19 +210,17 @@ pub fn ttree<'s, E>(context: &'s Context) -> impl RParser<'s, Tree, E> } }; - let (i, fbranches) = - length_value(checked_byte_count, tobjarray(tbranch_hdr(context))) - .context("ttree branches") - .complete() - .context("length-prefixed data") - .parse(i)?; + let (i, fbranches) = length_value(checked_byte_count, tobjarray(tbranch_hdr(context))) + .context("ttree branches") + .complete() + .context("length-prefixed data") + .parse(i)?; - let (i, fleaves) = - length_value(checked_byte_count, tobjarray(TLeaf::parse(context))) - .context("ttree leaves") - .complete() - .context("length-prefixed data") - .parse(i)?; + let (i, fleaves) = length_value(checked_byte_count, tobjarray(TLeaf::parse(context))) + .context("ttree leaves") + .complete() + .context("length-prefixed data") + .parse(i)?; let (i, faliases) = none_or_u8_buf.context("faliases").parse(i)?; let (i, findexvalues) = tarray(be_f64).context("findexvalues").parse(i)?; From 0b849c686420bd8a4a298958ff0645952ee168ef Mon Sep 17 00:00:00 2001 From: Christian Bourjau Date: Wed, 29 Jun 2022 10:27:52 +0200 Subject: [PATCH 7/7] Remove unused unstable feature --- root-io/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/root-io/src/lib.rs b/root-io/src/lib.rs index d3eecae..d61dbfa 100644 --- a/root-io/src/lib.rs +++ b/root-io/src/lib.rs @@ -13,7 +13,6 @@ //! The API surface is deliberately small to make the processing of said //! files as easy as possible. If you are looking for a particular //! parser chances have it that it exists but it is not marked as `pub`. -#![feature(negative_impls)] #![allow(clippy::cognitive_complexity)] #![recursion_limit = "256"] extern crate alice_open_data;