-
Notifications
You must be signed in to change notification settings - Fork 47
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Non-Human-Readable Deserialization for Felt Using Serde #76
base: main
Are you sure you want to change the base?
Changes from 6 commits
fd0e062
850ee6e
d74fc67
6723646
d25da84
7b98204
fd2299f
5302ed4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -913,6 +913,8 @@ mod serde_impl { | |
|
||
use super::*; | ||
|
||
const COMPRESSED: u8 = 0b1000_0000; | ||
|
||
impl Serialize for Felt { | ||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> | ||
where | ||
|
@@ -921,11 +923,23 @@ mod serde_impl { | |
if serializer.is_human_readable() { | ||
serializer.serialize_str(&format!("{:#x}", self)) | ||
} else { | ||
let mut seq = serializer.serialize_seq(Some(32))?; | ||
for b in self.to_bytes_be() { | ||
seq.serialize_element(&b)?; | ||
let bytes = self.to_bytes_be(); | ||
let first_significant_byte = bytes.iter().position(|&b| b != 0).unwrap_or(31); | ||
if first_significant_byte > 1 { | ||
let len = 32 - first_significant_byte; | ||
let mut seq = serializer.serialize_seq(Some(len + 1))?; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because we are already storing the size. We should call There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It makes our worst case 33 bytes and our best 3 bytes (value, our len, their len). We want 32 and 2. |
||
seq.serialize_element(&(len as u8 | COMPRESSED))?; | ||
for b in &bytes[first_significant_byte..] { | ||
seq.serialize_element(b)?; | ||
} | ||
seq.end() | ||
} else { | ||
let mut seq = serializer.serialize_seq(Some(32))?; | ||
for b in &bytes { | ||
seq.serialize_element(b)?; | ||
} | ||
seq.end() | ||
} | ||
seq.end() | ||
} | ||
} | ||
} | ||
|
@@ -935,7 +949,11 @@ mod serde_impl { | |
where | ||
D: ::serde::Deserializer<'de>, | ||
{ | ||
deserializer.deserialize_str(FeltVisitor) | ||
if deserializer.is_human_readable() { | ||
deserializer.deserialize_str(FeltVisitor) | ||
} else { | ||
deserializer.deserialize_seq(FeltVisitor) | ||
} | ||
} | ||
} | ||
|
||
|
@@ -960,6 +978,36 @@ mod serde_impl { | |
.ok_or(String::from("Expected hex string to be prefixed by '0x'")) | ||
.map_err(de::Error::custom) | ||
} | ||
|
||
fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error> | ||
where | ||
A: de::SeqAccess<'de>, | ||
{ | ||
let mut bytes = [0u8; 32]; | ||
let first = seq | ||
.next_element::<u8>()? | ||
.ok_or(de::Error::invalid_length(0, &"more bytes"))?; | ||
if first & COMPRESSED != 0 { | ||
let len = first & !COMPRESSED; | ||
for (i, byte) in bytes.iter_mut().skip(32 - len as usize).enumerate() { | ||
if let Some(b) = seq.next_element()? { | ||
*byte = b; | ||
} else { | ||
return Err(de::Error::invalid_length(i + 1, &"more bytes")); | ||
} | ||
} | ||
} else { | ||
bytes[0] = first; | ||
for byte in bytes.iter_mut().skip(1) { | ||
if let Some(b) = seq.next_element()? { | ||
*byte = b; | ||
} else { | ||
return Err(de::Error::invalid_length(0, &"32 bytes")); | ||
} | ||
} | ||
} | ||
Ok(Felt::from_bytes_be(&bytes)) | ||
} | ||
} | ||
} | ||
|
||
|
@@ -1088,7 +1136,9 @@ mod test { | |
use proptest::prelude::*; | ||
use regex::Regex; | ||
#[cfg(feature = "serde")] | ||
use serde_test::{assert_de_tokens, assert_ser_tokens, Configure, Token}; | ||
use serde_test::{ | ||
assert_de_tokens, assert_de_tokens_error, assert_ser_tokens, Compact, Configure, Token, | ||
}; | ||
|
||
#[test] | ||
fn test_debug_format() { | ||
|
@@ -1622,15 +1672,60 @@ mod test { | |
#[test] | ||
#[cfg(feature = "serde")] | ||
fn deserialize() { | ||
assert_de_tokens(&Felt::ZERO, &[Token::String("0x0")]); | ||
assert_de_tokens(&Felt::TWO, &[Token::String("0x2")]); | ||
assert_de_tokens(&Felt::THREE, &[Token::String("0x3")]); | ||
assert_de_tokens(&Felt::ZERO.readable(), &[Token::String("0x0")]); | ||
assert_de_tokens(&Felt::TWO.readable(), &[Token::String("0x2")]); | ||
assert_de_tokens(&Felt::THREE.readable(), &[Token::String("0x3")]); | ||
assert_de_tokens( | ||
&Felt::MAX, | ||
&Felt::MAX.readable(), | ||
&[Token::String( | ||
"0x800000000000011000000000000000000000000000000000000000000000000", | ||
)], | ||
); | ||
|
||
assert_de_tokens( | ||
&Felt::ZERO.compact(), | ||
&[ | ||
Token::Seq { len: Some(2) }, | ||
Token::U8(1 | 0x80), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we really need this first element containing len and the compressed flag?
And know that because we are using big endian repr, missing bytes are There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Serialization can be: let be_bytes = self.to_bytes_be();
let mut i = match be_bytes.iter().find(|b| b != 0) {
Some(i) => i,
None => { // handle Felt::ZERO and return }
}
let mut seq = serializer.serialize_seq(Some(32 - i))?;
while i < 32 {
seq.serialize_element(be_bytes[i])?;
i += 1;
}
seq.end(); And deserialization: let mut buffer = [0; 32];
let mut i = 32;
while i > 0 {
if let Some(b) = seq.next_element()? {
buffer[i-1] = b;
} else {
break;
}
i -= 1;
}
if i == 0 && seq.next_element()?.is_some() {
// Return error invalid len
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why wouldn't this way simpler impl work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But isn't what we are already doing with And how exactly could it fail? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The idea here we had with @jbcaron was to make a binary format that never exceeds 32 bytes in the worse case (abitrary hash), and compresses it down to a few bytes in the best cases (small felt) To do that, we think of the input as a byte stream and we use the felt unused bits in the first byte to encode a flag telling us if the felt is compressed or not When the felt is compressed we treat that first byte as the length, so we read However, we found out that there is an issue: using serde Taking inspiration from fixed-sized arrays implementation in serde, they are serialized using That gave me an idea though - which I tried here https://gist.github.com/cchudant/9bbb025c60a9fe23ca56b895fdcff13c @tdelabro what do you think of that? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Okay, I understand better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still, why do you need the flag for? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You need the byte that contain flag+len only when you serialize [Felt]. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So my recomandation would be:
This way values that are a single felt will always be between 1 and 32 bytes.
This makes you spare one byte in the case of felts that are 32 bytes long and nothing for all the others. Not gonna lie, it's not a huge optimization but may still be worth for some people. |
||
Token::U8(0), | ||
Token::SeqEnd, | ||
], | ||
); | ||
assert_de_tokens( | ||
&Felt::ONE.compact(), | ||
&[ | ||
Token::Seq { len: Some(2) }, | ||
Token::U8(1 | 0x80), | ||
Token::U8(1), | ||
Token::SeqEnd, | ||
], | ||
); | ||
assert_de_tokens( | ||
&Felt::TWO.compact(), | ||
&[ | ||
Token::Seq { len: Some(2) }, | ||
Token::U8(1 | 0x80), | ||
Token::U8(2), | ||
Token::SeqEnd, | ||
], | ||
); | ||
assert_de_tokens( | ||
&Felt::THREE.compact(), | ||
&[ | ||
Token::Seq { len: Some(2) }, | ||
Token::U8(1 | 0x80), | ||
Token::U8(3), | ||
Token::SeqEnd, | ||
], | ||
); | ||
assert_de_tokens_error::<Compact<Felt>>( | ||
&[ | ||
Token::Seq { len: Some(1) }, | ||
Token::U8(1 | 0x80), | ||
Token::SeqEnd, | ||
], | ||
"invalid length 1, expected more bytes", | ||
); | ||
} | ||
|
||
#[test] | ||
|
@@ -1649,116 +1744,35 @@ mod test { | |
assert_ser_tokens( | ||
&Felt::ZERO.compact(), | ||
&[ | ||
Token::Seq { len: Some(32) }, | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::Seq { len: Some(2) }, | ||
Token::U8(1 | 0x80), | ||
Token::U8(0), | ||
Token::SeqEnd, | ||
], | ||
); | ||
assert_ser_tokens( | ||
&Felt::ONE.compact(), | ||
&[ | ||
Token::Seq { len: Some(2) }, | ||
Token::U8(1 | 0x80), | ||
Token::U8(1), | ||
Token::SeqEnd, | ||
], | ||
); | ||
assert_ser_tokens( | ||
&Felt::TWO.compact(), | ||
&[ | ||
Token::Seq { len: Some(32) }, | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::Seq { len: Some(2) }, | ||
Token::U8(1 | 0x80), | ||
Token::U8(2), | ||
Token::SeqEnd, | ||
], | ||
); | ||
assert_ser_tokens( | ||
&Felt::THREE.compact(), | ||
&[ | ||
Token::Seq { len: Some(32) }, | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::U8(0), | ||
Token::Seq { len: Some(2) }, | ||
Token::U8(1 | 0x80), | ||
Token::U8(3), | ||
Token::SeqEnd, | ||
], | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can avoid the if else nesting of
if first_significant_byte > 1
doing the following: