Skip to content

Commit

Permalink
Merge pull request #327 from brendanzab/formats/deref-format-2
Browse files Browse the repository at this point in the history
Implement deref formats
  • Loading branch information
brendanzab authored Mar 30, 2022
2 parents 7e3812e + 807d7c9 commit 7641402
Show file tree
Hide file tree
Showing 11 changed files with 156 additions and 45 deletions.
50 changes: 33 additions & 17 deletions doc/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ elaboration, and core language is forthcoming.
- [Overlap formats](#overlap-formats)
- [Number formats](#number-formats)
- [Array formats](#array-formats)
- [Link formats](#link-formats)
- [Stream position formats](#stream-position-formats)
- [Link formats](#link-formats)
- [Deref formats](#deref-formats)
- [Succeed format](#succeed-format)
- [Fail format](#fail-format)
- [Functions](#functions)
Expand Down Expand Up @@ -348,6 +349,19 @@ of the host array types.
| `array32 len format` | `Array32 len (Repr format)` |
| `array64 len format` | `Array64 len (Repr format)` |

### Stream position formats

The stream position format is interpreted as the current stream position during
parsing:

- `stream_pos : Format`

#### Representation of stream position formats

| format | `Repr` format |
| ------------ | ------------- |
| `stream_pos` | `Pos` |

### Link formats

Link formats allow for references to other parts of a binary stream to be
Expand All @@ -361,22 +375,25 @@ to expect at that position:
Links formats are [represented](#format-representations) as typed
[references](#references) to other parts of the binary stream.

| format | `Repr` format |
| -------------------- | --------------------------- |
| `link pos format` | `Ref (Repr format)` |
| format | `Repr` format |
| ----------------- | ------------- |
| `link pos format` | `Ref format` |

### Stream position formats
### Deref formats

The stream position format is interpreted as the current stream position during
parsing:
Deref formats allow [references](#references) to other parts of the stream to be
included in resulting parsed output.

- `stream_pos : Format`
- `deref : fun (f : Format) -> Ref f -> Format`

#### Representation of stream position formats
#### Representation of deref formats

| format | `Repr` format |
| ------------ | ------------- |
| `stream_pos` | `Pos` |
Dereferences are [represented](#format-representations) after parsing using the
representation of the referenced format.

| format | `Repr` format |
| ------------------ | ------------- |
| `deref format ref` | `Repr format` |

### Succeed format

Expand Down Expand Up @@ -687,13 +704,12 @@ A number of operations are defined for positions:

## References

References are like [stream positions](#positions), only they also have an
expected type given as well:
References to other parts of the binary file are described with:

- `Ref : Type -> Type`
- `Ref : Format -> Type`

References are usually encountered as a result of parsing a [link
format](#link-formats).
References are usually encountered as a result of parsing a [link format](#link-formats),
and can be dereferenced later on with a [deref format](#deref-formats).

## Void

Expand Down
2 changes: 2 additions & 0 deletions fathom/src/core.rs
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,8 @@ def_prims! {
/// A format that links to another location in the binary data stream,
/// relative to a base position.
FormatLink => "link",
/// A format that forces a reference to be read eagerly.
FormatDeref => "deref",
/// Format representations.
FormatRepr => "Repr",

Expand Down
100 changes: 79 additions & 21 deletions fathom/src/core/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::env::{EnvLen, SliceEnv};
pub struct Context<'arena, 'env> {
flexible_exprs: &'env SliceEnv<Option<ArcValue<'arena>>>,
pending_formats: Vec<(u64, ArcValue<'arena>)>,
cached_refs: HashMap<u64, Vec<ParsedRef<'arena>>>,
}

pub struct ParsedRef<'arena> {
Expand All @@ -27,6 +28,7 @@ impl<'arena, 'env> Context<'arena, 'env> {
Context {
flexible_exprs,
pending_formats: Vec::new(),
cached_refs: HashMap::new(),
}
}

Expand All @@ -40,36 +42,18 @@ impl<'arena, 'env> Context<'arena, 'env> {

// TODO: allow refs to be streamed
pub fn read_entrypoint(
&mut self,
mut self,
reader: &mut dyn SeekRead,
format: ArcValue<'arena>,
) -> io::Result<HashMap<u64, Vec<ParsedRef<'arena>>>> {
let initial_pos = reader.stream_position()?;
let mut refs = HashMap::<_, Vec<ParsedRef<'_>>>::new();

// Parse the entrypoint from the beginning start of the binary data
self.pending_formats.push((0, format));

while let Some((pos, format)) = self.pending_formats.pop() {
let parsed_refs = refs.entry(pos).or_insert(Vec::with_capacity(1));

if (parsed_refs.iter())
.find(|r| self.conversion_context().is_equal(&r.format, &format))
.is_none()
{
// Seek to current current ref location
reader.seek(SeekFrom::Start(pos))?;
// Parse the data at that location
let expr = self.read_format(reader, &format)?;
// Record the data in the refs at this position
parsed_refs.push(ParsedRef { format, expr });
}
self.read_cached_ref(reader, pos, &format)?;
}

// Reset reader back to the start
reader.seek(SeekFrom::Start(initial_pos))?;

Ok(refs)
Ok(self.cached_refs)
}

fn read_format(
Expand Down Expand Up @@ -166,6 +150,7 @@ impl<'arena, 'env> Context<'arena, 'env> {
(Prim::FormatArray32, [Fun(len), Fun(elem_format)]) => self.read_array(reader, len, elem_format),
(Prim::FormatArray64, [Fun(len), Fun(elem_format)]) => self.read_array(reader, len, elem_format),
(Prim::FormatLink, [Fun(pos), Fun(elem_format)]) => self.read_link(pos, elem_format),
(Prim::FormatDeref, [Fun(elem_format), Fun(r#ref)]) => self.read_deref(reader, elem_format, r#ref),
(Prim::FormatStreamPos, []) => read_stream_pos(reader),
(Prim::FormatFail, []) => Err(io::Error::new(io::ErrorKind::Other, "parse failure")),
_ => Err(io::Error::new(io::ErrorKind::Other, "invalid format")),
Expand Down Expand Up @@ -208,6 +193,79 @@ impl<'arena, 'env> Context<'arena, 'env> {

Ok(Arc::new(Value::Const(Const::Ref(pos))))
}

fn read_deref(
&mut self,
reader: &mut dyn SeekRead,
format: &ArcValue<'arena>,
r#ref: &ArcValue<'arena>,
) -> io::Result<ArcValue<'arena>> {
let pos = match self.elim_context().force(r#ref).as_ref() {
Value::Const(Const::Ref(pos)) => *pos,
_ => {
return Err(io::Error::new(
io::ErrorKind::Other,
"invalid format reference",
))
}
};

self.read_cached_ref(reader, pos, format)
}

fn lookup_cached_ref<'context>(
&'context self,
pos: u64,
format: &ArcValue<'_>,
) -> Option<&'context ParsedRef<'arena>> {
// NOTE: The number of calls to `semantics::ConversionContext::is_equal`
// when looking up cached references is a bit of a pain. If this ever
// becomes a problem we could improve performance by pre-allocating a
// `ParsedRef` in the cache during `read_link`, and storing the index of
// that parsed reference alongside the position in `Const::Ref`.

(self.cached_refs.get(&pos)?.iter())
.find(|r| self.conversion_context().is_equal(&r.format, &format))
}

fn read_cached_ref(
&mut self,
reader: &mut dyn SeekRead,
pos: u64,
format: &ArcValue<'arena>,
) -> io::Result<ArcValue<'arena>> {
if let Some(parsed_ref) = self.lookup_cached_ref(pos, &format) {
return Ok(parsed_ref.expr.clone());
}

let initial_pos = reader.stream_position()?;

// Seek to current current ref location
reader.seek(SeekFrom::Start(pos))?;
// Parse the data at that location
let expr = self.read_format(reader, &format)?;
// Reset reader back to the original position
reader.seek(SeekFrom::Start(initial_pos))?;

// We might have parsed the current reference during the above call to
// `read_format`. It's unclear if this could ever happen in practice,
// especially without succumbing to non-termination, but we'll panic
// here just in case.
if let Some(_) = self.lookup_cached_ref(pos, &format) {
panic!("recursion found when storing cached reference {}", pos);
}

// Store the parsed reference in the reference cache
self.cached_refs
.entry(pos)
.or_insert(Vec::with_capacity(1))
.push(ParsedRef {
format: format.clone(),
expr: expr.clone(),
});

Ok(expr)
}
}

pub trait SeekRead: Seek + Read {}
Expand Down
5 changes: 3 additions & 2 deletions fathom/src/core/semantics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ impl<'arena, 'env> ElimContext<'arena, 'env> {
}
Value::Stuck(Head::Prim(prim), spine) => {
match (prim, &spine[..]) {
(Prim::FormatSucceed, [Elim::Fun(r#elem), _]) => r#elem.clone(),
(Prim::FormatSucceed, [Elim::Fun(elem), _]) => elem.clone(),
(Prim::FormatFail, []) => Arc::new(Value::prim(Prim::VoidType, [])),
(Prim::FormatU8, []) => Arc::new(Value::prim(Prim::U8Type, [])),
(Prim::FormatU16Be, []) => Arc::new(Value::prim(Prim::U16Type, [])),
Expand Down Expand Up @@ -656,8 +656,9 @@ impl<'arena, 'env> ElimContext<'arena, 'env> {
Value::prim(Prim::Array64Type, [len.clone(), self.apply_repr(elem)]),
),
(Prim::FormatLink, [Elim::Fun(_), Elim::Fun(elem)]) => {
Arc::new(Value::prim(Prim::RefType, [self.apply_repr(elem)]))
Arc::new(Value::prim(Prim::RefType, [elem.clone()]))
}
(Prim::FormatDeref, [Elim::Fun(elem), Elim::Fun(_)]) => self.apply_repr(elem),
(Prim::FormatStreamPos, []) => Arc::new(Value::prim(Prim::PosType, [])),
(Prim::ReportedError, []) => Arc::new(Value::prim(Prim::ReportedError, [])),
_ => panic_any(Error::InvalidFormatRepr),
Expand Down
14 changes: 13 additions & 1 deletion fathom/src/surface/elaboration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ impl<'arena> RigidEnv<'arena> {
define_prim(PosType, universe());
define_prim(
RefType,
Arc::new(Value::FunType(None, universe(), close(&UNIVERSE))),
Arc::new(Value::FunType(None, format_type(), close(&UNIVERSE))),
);

define_prim(FormatType, universe());
Expand Down Expand Up @@ -153,6 +153,18 @@ impl<'arena> RigidEnv<'arena> {
define_prim(FormatArray32, format_array(U32Type));
define_prim(FormatArray64, format_array(U64Type));
define_prim(FormatLink, binary_op(PosType, FormatType, FormatType));
define_prim(
FormatDeref,
Arc::new(Value::FunType(
name("Elem"),
format_type(),
close(&Term::FunType(
None,
&Term::FunElim(&Term::Prim(RefType), &VAR0),
&FORMAT_TYPE,
)),
)),
);
define_prim(FormatStreamPos, format_type());
define_prim(
FormatRepr,
Expand Down
7 changes: 7 additions & 0 deletions tests/succeed/format-deref/simple.fathom
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
start <- stream_pos,
link <- link start u16be,
len <- deref _ link,
_reserved <- u16be,
data <- array16 len u16be,
}
10 changes: 10 additions & 0 deletions tests/succeed/format-deref/simple.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
stdout = '''
{
start <- stream_pos,
link <- link start u16be,
len <- deref (_ start link) link,
_reserved <- u16be,
data <- array16 len u16be,
} : Format
'''
stderr = ''
3 changes: 2 additions & 1 deletion tests/succeed/format-repr/primitives.fathom
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ let test_array16 : fun n -> fun f -> Repr (array16 n f) -> Array16 n (Repr f) =
let test_array32 : fun n -> fun f -> Repr (array32 n f) -> Array32 n (Repr f) = fun _ => fun _ => fun x => x;
let test_array64 : fun n -> fun f -> Repr (array64 n f) -> Array64 n (Repr f) = fun _ => fun _ => fun x => x;

let test_link : fun pos -> fun f -> Repr (link pos f) -> Ref (Repr f) = fun _ => fun _ => fun x => x;
let test_link : fun pos -> fun f -> Repr (link pos f) -> Ref f = fun _ => fun _ => fun x => x;
let test_deref : fun f -> fun ref -> Repr (deref f ref) -> Repr f = fun _ => fun _ => fun x => x;
let test_stream_pos : Repr stream_pos -> Pos = fun x => x;

Type
6 changes: 4 additions & 2 deletions tests/succeed/format-repr/primitives.snap
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ let test_array32 : fun (n : U32) -> fun (f : Format) -> fun (_ :
Array32 n (Repr f)) -> Array32 n (Repr f) = fun _ => fun _ => fun x => x;
let test_array64 : fun (n : U64) -> fun (f : Format) -> fun (_ :
Array64 n (Repr f)) -> Array64 n (Repr f) = fun _ => fun _ => fun x => x;
let test_link : fun (pos : Pos) -> fun (f : Format) -> fun (_ : Ref (Repr f)) ->
Ref (Repr f) = fun _ => fun _ => fun x => x;
let test_link : fun (pos : Pos) -> fun (f : Format) -> fun (_ : Ref f) ->
Ref f = fun _ => fun _ => fun x => x;
let test_deref : fun (f : Format) -> fun (ref : Ref f) -> fun (_ : Repr f) ->
Repr f = fun _ => fun _ => fun x => x;
let test_stream_pos : fun (_ : Pos) -> Pos = fun x => x;
Type : Type
'''
Expand Down
3 changes: 2 additions & 1 deletion tests/succeed/primitives.fathom
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ let _ = Array16 : U16 -> Type -> Type;
let _ = Array32 : U32 -> Type -> Type;
let _ = Array64 : U64 -> Type -> Type;
let _ = Pos : Type;
let _ = Ref : Type -> Type;
let _ = Ref : Format -> Type;

let _ = 1 : U8;
let _ = 1 : U16;
Expand Down Expand Up @@ -54,6 +54,7 @@ let _ = array16 : U16 -> Format -> Format;
let _ = array32 : U32 -> Format -> Format;
let _ = array64 : U64 -> Format -> Format;
let _ = link : Pos -> Format -> Format;
let _ = deref : fun (f : Format) -> Ref f -> Format;
let _ = stream_pos : Format;
let _ = Repr : Format -> Type;

Expand Down
1 change: 1 addition & 0 deletions tests/succeed/primitives.snap
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ let _ : _ = array16;
let _ : _ = array32;
let _ : _ = array64;
let _ : _ = link;
let _ : _ = deref;
let _ : _ = stream_pos;
let _ : _ = Repr;
let _ : _ = u8_add;
Expand Down

0 comments on commit 7641402

Please sign in to comment.