Skip to content

Commit

Permalink
Adds additional tooling APIs to enable Ion 1.1 support in the CLI's `…
Browse files Browse the repository at this point in the history
…inspect` command (#802)

* Adds EncodedBinaryValue trait
* Rename ExpandedValueSource::EExp to SingletonEExp
* Makes raw iterators and MacroExpansion `Copy`, adds `peek_next()`
* Adds RawReaderState type, simplifies singleton eexp annotations
* Adds ExpandedStreamItem, is_ephemeral
* Adds value completeness check for text eexp ID, bin values
* improved formatting for InvalidInputError message
* No digits after float `e` is `incomplete`, improves error fmt
* Fixes incompleteness check for float/decimal digits after dot
  • Loading branch information
zslayton authored Aug 13, 2024
1 parent bc2e976 commit d4ad0c5
Show file tree
Hide file tree
Showing 30 changed files with 886 additions and 209 deletions.
32 changes: 21 additions & 11 deletions src/lazy/any_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::raw_stream_item::LazyRawStreamItem;
use crate::lazy::raw_value_ref::RawValueRef;
use crate::lazy::span::Span;
use crate::lazy::streaming_raw_reader::RawReaderState;
use crate::lazy::text::raw::r#struct::{
LazyRawTextFieldName_1_0, LazyRawTextStruct_1_0, RawTextStructIterator_1_0,
};
Expand Down Expand Up @@ -599,20 +600,20 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> {
}
}

fn stream_data(&self) -> (&'data [u8], usize, IonEncoding) {
fn save_state(&self) -> RawReaderState<'data> {
use RawReaderKind::*;
let (remaining_data, stream_offset, mut encoding) = match &self.encoding_reader {
Text_1_0(r) => r.stream_data(),
Binary_1_0(r) => r.stream_data(),
Text_1_1(r) => r.stream_data(),
Binary_1_1(r) => r.stream_data(),
let reader_state = match &self.encoding_reader {
Text_1_0(r) => r.save_state(),
Binary_1_0(r) => r.save_state(),
Text_1_1(r) => r.save_state(),
Binary_1_1(r) => r.save_state(),
};
// If we hit an IVM that changed the encoding but we haven't changed our reader yet,
// we still want to report the new encoding.
if let Some(new_encoding) = self.new_encoding {
encoding = new_encoding;
return RawReaderState::new(reader_state.data(), reader_state.offset(), new_encoding);
}
(remaining_data, stream_offset, encoding)
reader_state
}

fn next<'top>(
Expand All @@ -625,9 +626,12 @@ impl<'data> LazyRawReader<'data, AnyEncoding> for LazyRawAnyReader<'data> {
// If we previously ran into an IVM that changed the stream encoding, replace our reader
// with one that can read the new encoding.
if let Some(new_encoding) = self.new_encoding.take() {
let (remaining_data, stream_offset, _) = self.stream_data();
let new_encoding_reader =
RawReaderKind::resume_at_offset(remaining_data, stream_offset, new_encoding);
let reader_state = self.save_state();
let new_encoding_reader = RawReaderKind::resume_at_offset(
reader_state.data(),
reader_state.offset(),
new_encoding,
);
self.encoding_reader = new_encoding_reader;
}

Expand Down Expand Up @@ -1149,10 +1153,12 @@ impl<'top> LazyContainerPrivate<'top, AnyEncoding> for LazyRawAnyList<'top> {
}
}

#[derive(Debug, Copy, Clone)]
pub struct RawAnyListIterator<'data> {
encoding: RawAnyListIteratorKind<'data>,
}

#[derive(Debug, Copy, Clone)]
pub enum RawAnyListIteratorKind<'data> {
Text_1_0(RawTextListIterator_1_0<'data>),
Binary_1_0(RawBinarySequenceIterator_1_0<'data>),
Expand Down Expand Up @@ -1310,10 +1316,12 @@ impl<'data> LazyContainerPrivate<'data, AnyEncoding> for LazyRawAnySExp<'data> {
}
}

#[derive(Debug, Copy, Clone)]
pub struct RawAnySExpIterator<'data> {
encoding: RawAnySExpIteratorKind<'data>,
}

#[derive(Debug, Copy, Clone)]
pub enum RawAnySExpIteratorKind<'data> {
Text_1_0(RawTextSExpIterator_1_0<'data>),
Binary_1_0(RawBinarySequenceIterator_1_0<'data>),
Expand Down Expand Up @@ -1513,10 +1521,12 @@ impl<'top> From<LazyRawBinaryFieldName_1_1<'top>> for LazyRawAnyFieldName<'top>
}
}

#[derive(Debug, Copy, Clone)]
pub struct RawAnyStructIterator<'data> {
encoding: RawAnyStructIteratorKind<'data>,
}

#[derive(Debug, Copy, Clone)]
pub enum RawAnyStructIteratorKind<'data> {
Text_1_0(RawTextStructIterator_1_0<'data>),
Binary_1_0(RawBinaryStructIterator_1_0<'data>),
Expand Down
6 changes: 4 additions & 2 deletions src/lazy/binary/raw/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use crate::{Encoding, IonResult};

use crate::lazy::any_encoding::IonEncoding;
use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::streaming_raw_reader::RawReaderState;

/// A binary Ion 1.0 reader that yields [`LazyRawBinaryValue_1_0`]s representing the top level values found
/// in the provided input stream.
Expand Down Expand Up @@ -118,9 +119,9 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader_1_0
}
}

fn stream_data(&self) -> (&'data [u8], usize, IonEncoding) {
fn save_state(&self) -> RawReaderState<'data> {
let stream_offset = self.position();
(
RawReaderState::new(
&self.data.buffer.bytes()[self.data.bytes_to_skip..],
stream_offset,
IonEncoding::Binary_1_0,
Expand Down Expand Up @@ -148,6 +149,7 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_0> for LazyRawBinaryReader_1_0

/// Wraps an [`ImmutableBuffer`], allowing the reader to advance each time an item is successfully
/// parsed from it.
#[derive(Debug, Copy, Clone)]
pub(crate) struct DataSource<'data> {
// The buffer we're reading from
buffer: ImmutableBuffer<'data>,
Expand Down
1 change: 1 addition & 0 deletions src/lazy/binary/raw/sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ impl<'a> Debug for LazyRawBinarySequence_1_0<'a> {
}
}

#[derive(Debug, Copy, Clone)]
pub struct RawBinarySequenceIterator_1_0<'top> {
source: DataSource<'top>,
}
Expand Down
1 change: 1 addition & 0 deletions src/lazy/binary/raw/struct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ impl<'top> LazyRawStruct<'top, BinaryEncoding_1_0> for LazyRawBinaryStruct_1_0<'
}
}

#[derive(Debug, Copy, Clone)]
pub struct RawBinaryStructIterator_1_0<'top> {
source: DataSource<'top>,
}
Expand Down
6 changes: 4 additions & 2 deletions src/lazy/binary/raw/v1_1/e_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -281,8 +281,9 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> {
let expr = EExpArgExpr::ArgGroup(BinaryEExpArgGroup::new(parameter, input, 0));
(EExpArg::new(parameter, expr), self.remaining_args_buffer)
}
// If it's a tagged value expression, parse it as usual.
// It's a single expression; we'll need to look at the parameter's declared encoding.
ArgGrouping::ValueExprLiteral => match parameter.encoding() {
// The encoding starts with an opcode.
ParameterEncoding::Tagged => {
let (expr, remaining) = try_or_some_err! {
self
Expand All @@ -291,6 +292,7 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> {
};
(EExpArg::new(parameter, expr), remaining)
}
// It's a FlexUInt.
ParameterEncoding::FlexUInt => {
let (flex_uint_lazy_value, remaining) = try_or_some_err! {
self.remaining_args_buffer.read_flex_uint_as_lazy_value()
Expand All @@ -304,7 +306,7 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> {
EExpArg::new(parameter, EExpArgExpr::ValueLiteral(value_ref)),
remaining,
)
}
} // TODO: The other tagless encodings
},
// If it's an argument group...
ArgGrouping::ArgGroup => {
Expand Down
13 changes: 10 additions & 3 deletions src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ impl<'a> ImmutableBuffer<'a> {
}
// XXX: This *doesn't* slice `self` because FlexUInt::read() is faster if the input
// is at least the size of a u64.
let matched_input = self;
let matched_input = self.slice(0, size_in_bytes);
let remaining_input = self.slice_to_end(size_in_bytes);
let value = LazyRawBinaryValue_1_1::for_flex_uint(matched_input);
Ok((value, remaining_input))
Expand Down Expand Up @@ -622,7 +622,7 @@ impl<'a> ImmutableBuffer<'a> {
let header = opcode
.to_header()
.ok_or_else(|| IonError::decoding_error("found a non-value in value position .."))?;

let header_offset = input.offset();
let (total_length, length_length, value_body_length, delimited_contents) = if opcode.is_delimited_start() {
let (contents, after) = input.peek_delimited_container(opcode)?;
let total_length = after.offset() - self.offset();
Expand All @@ -646,10 +646,17 @@ impl<'a> ImmutableBuffer<'a> {
(total_length, length_length, value_length, DelimitedContents::None)
};

let header_offset = input.offset();
if total_length > input.len() {
return IonResult::incomplete(
"the stream ended unexpectedly in the middle of a value",
header_offset,
);
}

let encoded_value = EncodedValue {
encoding: ParameterEncoding::Tagged,
header,
// If applicable, these are populated by the caller: `read_annotated_value()`
annotations_header_length: 0,
annotations_sequence_length: 0,
annotations_encoding: AnnotationsEncoding::SymbolAddress,
Expand Down
5 changes: 3 additions & 2 deletions src/lazy/binary/raw/v1_1/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::lazy::encoder::private::Sealed;
use crate::lazy::encoding::BinaryEncoding_1_1;
use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::raw_stream_item::{EndPosition, LazyRawStreamItem, RawStreamItem};
use crate::lazy::streaming_raw_reader::RawReaderState;
use crate::{Encoding, IonResult};

pub struct LazyRawBinaryReader_1_1<'data> {
Expand Down Expand Up @@ -107,8 +108,8 @@ impl<'data> LazyRawReader<'data, BinaryEncoding_1_1> for LazyRawBinaryReader_1_1
Self::new_with_offset(data, offset)
}

fn stream_data(&self) -> (&'data [u8], usize, IonEncoding) {
(
fn save_state(&self) -> RawReaderState<'data> {
RawReaderState::new(
&self.input[self.local_offset..],
self.position(),
self.encoding(),
Expand Down
58 changes: 41 additions & 17 deletions src/lazy/binary/raw/v1_1/sequence.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,16 @@ pub struct LazyRawBinarySExp_1_1<'top> {

impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinaryList_1_1<'top> {
fn from_value(value: &'top LazyRawBinaryValue_1_1<'top>) -> Self {
let delimited_expr_cache = match value.delimited_contents {
DelimitedContents::None => None,
DelimitedContents::Values(values) => Some(values),
DelimitedContents::Fields(_) => unreachable!("sequence contained fields"),
};
LazyRawBinaryList_1_1 {
sequence: LazyRawBinarySequence_1_1 { value },
sequence: LazyRawBinarySequence_1_1 {
value,
delimited_expr_cache,
},
}
}
}
Expand Down Expand Up @@ -52,8 +60,16 @@ impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinaryList_1_1<'

impl<'top> LazyContainerPrivate<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<'top> {
fn from_value(value: &'top LazyRawBinaryValue_1_1<'top>) -> Self {
let delimited_expr_cache = match value.delimited_contents {
DelimitedContents::None => None,
DelimitedContents::Values(values) => Some(values),
DelimitedContents::Fields(_) => unreachable!("sequence contained fields"),
};
LazyRawBinarySExp_1_1 {
sequence: LazyRawBinarySequence_1_1 { value },
sequence: LazyRawBinarySequence_1_1 {
value,
delimited_expr_cache,
},
}
}
}
Expand Down Expand Up @@ -83,11 +99,18 @@ impl<'top> LazyRawSequence<'top, BinaryEncoding_1_1> for LazyRawBinarySExp_1_1<'
#[derive(Copy, Clone)]
pub struct LazyRawBinarySequence_1_1<'top> {
pub(crate) value: &'top LazyRawBinaryValue_1_1<'top>,
pub(crate) delimited_expr_cache: Option<&'top [LazyRawValueExpr<'top, BinaryEncoding_1_1>]>,
}

impl<'top> LazyRawBinarySequence_1_1<'top> {
pub fn new(value: &'top LazyRawBinaryValue_1_1<'top>) -> Self {
Self { value }
pub fn new(
value: &'top LazyRawBinaryValue_1_1<'top>,
delimited_expr_cache: Option<&'top [LazyRawValueExpr<'top, BinaryEncoding_1_1>]>,
) -> Self {
Self {
value,
delimited_expr_cache,
}
}

pub fn ion_type(&self) -> IonType {
Expand All @@ -102,7 +125,7 @@ impl<'top> LazyRawBinarySequence_1_1<'top> {
} else {
self.value.value_body_buffer()
};
RawBinarySequenceIterator_1_1::new(buffer_slice, self.value.delimited_contents)
RawBinarySequenceIterator_1_1::new(buffer_slice, self.delimited_expr_cache)
}
}

Expand Down Expand Up @@ -139,26 +162,24 @@ impl<'a> Debug for LazyRawBinarySequence_1_1<'a> {
}
}

#[derive(Debug, Copy, Clone)]
pub struct RawBinarySequenceIterator_1_1<'top> {
source: ImmutableBuffer<'top>,
bytes_to_skip: usize,
delimited_contents: DelimitedContents<'top>,
delimited_iter: Option<std::slice::Iter<'top, LazyRawValueExpr<'top, BinaryEncoding_1_1>>>,
delimited_expr_cache: Option<&'top [LazyRawValueExpr<'top, BinaryEncoding_1_1>]>,
expr_cache_index: usize,
}

impl<'top> RawBinarySequenceIterator_1_1<'top> {
pub(crate) fn new(
input: ImmutableBuffer<'top>,
delimited_contents: DelimitedContents<'top>,
delimited_expr_cache: Option<&'top [LazyRawValueExpr<'top, BinaryEncoding_1_1>]>,
) -> RawBinarySequenceIterator_1_1<'top> {
RawBinarySequenceIterator_1_1 {
source: input,
bytes_to_skip: 0,
delimited_contents,
delimited_iter: match &delimited_contents {
DelimitedContents::Values(vals) => Some(vals.iter()),
_ => None,
},
delimited_expr_cache,
expr_cache_index: 0,
}
}
}
Expand All @@ -167,14 +188,17 @@ impl<'top> Iterator for RawBinarySequenceIterator_1_1<'top> {
type Item = IonResult<LazyRawValueExpr<'top, BinaryEncoding_1_1>>;

fn next(&mut self) -> Option<Self::Item> {
if let Some(ref mut inner_iter) = &mut self.delimited_iter {
inner_iter.next().map(|val| Ok(*val))
if let Some(expr_cache) = self.delimited_expr_cache {
let expr = expr_cache.get(self.expr_cache_index)?;
self.expr_cache_index += 1;
Some(Ok(*expr))
} else {
self.source = self.source.consume(self.bytes_to_skip);
let (maybe_item, remaining_input) = try_or_some_err!(self.source.read_sequence_value_expr());
let (maybe_item, remaining_input) =
try_or_some_err!(self.source.read_sequence_value_expr());
if let Some(item) = maybe_item {
self.source = remaining_input;
return Some(Ok(item))
return Some(Ok(item));
}
None
}
Expand Down
Loading

0 comments on commit d4ad0c5

Please sign in to comment.