From 4e73e9280488bf21102d83e04f4c23f82d405c26 Mon Sep 17 00:00:00 2001 From: Michael Zaikin Date: Thu, 5 Dec 2024 15:32:35 +0000 Subject: [PATCH 1/3] Optimize ByteArray serialization --- packages/consensus/src/codec.cairo | 21 +++- packages/utils/src/bit_shifts.cairo | 23 ++++ packages/utils/src/word_array.cairo | 163 +++++++++++++++++++++++++++- 3 files changed, 200 insertions(+), 7 deletions(-) diff --git a/packages/consensus/src/codec.cairo b/packages/consensus/src/codec.cairo index 7f9d700e..bf41a9a8 100644 --- a/packages/consensus/src/codec.cairo +++ b/packages/consensus/src/codec.cairo @@ -4,6 +4,7 @@ use super::types::transaction::{Transaction, TxIn, TxOut, OutPoint}; use utils::hash::Digest; use utils::word_array::{WordArray, WordArrayTrait, WordSpan, WordSpanTrait}; use core::traits::DivRem; +use core::serde::Serde; pub trait Encode { /// Encodes using Bitcoin codec and appends to the buffer. @@ -31,15 +32,23 @@ pub impl EncodeSpan> of Encode> { } /// `Encode` trait implementation for `ByteArray`. -/// TODO: use WordArray for arguments instead of ByteArray. -/// Extra optimization: predict word array offset pub impl EncodeByteArray of Encode { fn encode_to(self: @ByteArray, ref dest: WordArray) { encode_compact_size(self.len(), ref dest); - let num_bytes = self.len(); - for i in 0..num_bytes { - dest.append_u8(self[i]); - } + + // Serialized ByteArray: [num_bytes31_chunks, bytes31_chunks..., last_word, last_word_len] + let mut out: Array = Default::default(); + self.serialize(ref out); + + let mut num_bytes31 = out.pop_front().unwrap(); + while num_bytes31 != 0 { + dest.append_bytes31(out.pop_front().unwrap().into()); + num_bytes31 -= 1; + }; + + let last_word = out.pop_front().unwrap(); + let last_word_len = out.pop_front().unwrap(); + dest.append_bytes(last_word.into(), last_word_len.try_into().unwrap()); } } diff --git a/packages/utils/src/bit_shifts.cairo b/packages/utils/src/bit_shifts.cairo index e6e5d9b3..2273c07f 100644 --- a/packages/utils/src/bit_shifts.cairo +++ b/packages/utils/src/bit_shifts.cairo @@ -90,6 +90,29 @@ pub fn pow2(exponent: u32) -> u64 { *hardcoded_results.span()[exponent] } +pub fn pow256(exponent: u32) -> NonZero { + let hardcoded_results: [u256; 32] = [ + 0x1, 0x100, 0x10000, 0x1000000, 0x100000000, 0x10000000000, 0x1000000000000, + 0x100000000000000, 0x10000000000000000, 0x1000000000000000000, 0x100000000000000000000, + 0x10000000000000000000000, 0x1000000000000000000000000, 0x100000000000000000000000000, + 0x10000000000000000000000000000, 0x1000000000000000000000000000000, + 0x100000000000000000000000000000000, 0x10000000000000000000000000000000000, + 0x1000000000000000000000000000000000000, 0x100000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000, 0x1000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000000000000, + 0x10000000000000000000000000000000000000000000000000000000000, + 0x1000000000000000000000000000000000000000000000000000000000000, + 0x100000000000000000000000000000000000000000000000000000000000000, + ]; + (*hardcoded_results.span()[exponent]).try_into().unwrap() +} + #[cfg(test)] mod tests { use super::{fast_pow, pow2, shr_u64}; diff --git a/packages/utils/src/word_array.cairo b/packages/utils/src/word_array.cairo index 033a3191..6c0eddab 100644 --- a/packages/utils/src/word_array.cairo +++ b/packages/utils/src/word_array.cairo @@ -5,6 +5,7 @@ //! SHA256 hash function which operates on 4-byte words. use core::traits::DivRem; +use utils::bit_shifts::pow256; /// Array of 4-byte words where the last word can be partial. #[derive(Drop, Debug, Default, PartialEq)] @@ -179,6 +180,96 @@ pub impl WordArrayImpl of WordArrayTrait { } } + fn append_bytes31(ref self: WordArray, value: u256) { + let bytes28 = if self.last_input_num_bytes == 0 { + let (bytes28, last_word) = DivRem::div_rem(value, 0x1000000); + self.last_input_word = last_word.try_into().expect('append_bytes31/1'); + self.last_input_num_bytes = 3; + bytes28 + } else if self.last_input_num_bytes == 1 { + let (first_word, bytes28) = DivRem::div_rem( + value, 0x100000000000000000000000000000000000000000000000000000000, + ); + self.append_word(first_word.try_into().expect('append_bytes31/2'), 3); + bytes28 + } else if self.last_input_num_bytes == 2 { + let (bytes29, last_word) = DivRem::div_rem(value, 0x100); + let (first_word, bytes28) = DivRem::div_rem( + bytes29, 0x100000000000000000000000000000000000000000000000000000000, + ); + self.append_word(first_word.try_into().expect('append_bytes31/3'), 2); + self.last_input_word = last_word.try_into().expect('4'); + self.last_input_num_bytes = 1; + bytes28 + } else { + let (bytes30, last_word) = DivRem::div_rem(value, 0x10000); + let (first_word, bytes28) = DivRem::div_rem( + bytes30, 0x100000000000000000000000000000000000000000000000000000000, + ); + self.append_word(first_word.try_into().expect('append_bytes31/5'), 1); + self.last_input_word = last_word.try_into().expect('append_bytes31/6'); + self.last_input_num_bytes = 2; + bytes28 + }; + + let (q0, r0) = DivRem::div_rem(bytes28, 0x100000000); + let (q1, r1) = DivRem::div_rem(q0, 0x100000000); + let (q2, r2) = DivRem::div_rem(q1, 0x100000000); + let (q3, r3) = DivRem::div_rem(q2, 0x100000000); + let (q4, r4) = DivRem::div_rem(q3, 0x100000000); + let (q5, r5) = DivRem::div_rem(q4, 0x100000000); + self.input.append(q5.try_into().expect('append_bytes31/7')); + self.input.append(r5.try_into().expect('append_bytes31/8')); + self.input.append(r4.try_into().expect('append_bytes31/9')); + self.input.append(r3.try_into().expect('append_bytes31/10')); + self.input.append(r2.try_into().expect('append_bytes31/11')); + self.input.append(r1.try_into().expect('append_bytes31/12')); + self.input.append(r0.try_into().expect('append_bytes31/13')); + } + + fn append_bytes(ref self: WordArray, value: u256, num_bytes: u32) { + let (num_full_words, last_input_num_bytes) = DivRem::div_rem( + self.last_input_num_bytes + num_bytes, 4, + ); + + if num_full_words != 0 { + let (head, last_word) = if last_input_num_bytes == 0 { + (value, 0) + } else { + DivRem::div_rem(value, pow256(last_input_num_bytes)) + }; + + let (mut full_words, mut full_words_num_bytes) = if self.last_input_num_bytes == 0 { + (head, num_bytes - last_input_num_bytes) + } else { + let first_word_num_bytes = 4 - self.last_input_num_bytes; + let full_words_num_bytes = num_bytes - last_input_num_bytes - first_word_num_bytes; + let (first_word, full_words) = DivRem::div_rem(head, pow256(full_words_num_bytes)); + self + .append_word( + first_word.try_into().expect('append_bytes/0'), first_word_num_bytes, + ); + (full_words, full_words_num_bytes) + }; + + if full_words_num_bytes != 0 { + full_words_num_bytes -= 4; + while full_words_num_bytes != 0 { + let (word, r) = DivRem::div_rem(full_words, pow256(full_words_num_bytes)); + self.input.append(word.try_into().expect('append_bytes/1')); + full_words = r; + full_words_num_bytes -= 4; + }; + self.input.append(full_words.try_into().expect('append_bytes/2')); + } + + self.last_input_word = last_word.try_into().expect('append_bytes/3'); + self.last_input_num_bytes = last_input_num_bytes; + } else { + self.append_word(value.try_into().expect('append_bytes/3'), num_bytes); + } + } + /// Split word array into components: /// (array of full 4-byte words, last word, number of bytes in the last word) fn into_components(self: WordArray) -> (Array, u32, u32) { @@ -263,7 +354,7 @@ pub mod hex { #[cfg(test)] mod tests { use super::WordSpanTrait; - use super::hex::words_to_hex; + use super::hex::{words_to_hex}; use super::{WordArray, WordArrayTrait}; #[test] @@ -342,4 +433,74 @@ mod tests { assert_eq!((0, 1), span.pop_back().unwrap()); assert_eq!(Option::None, span.pop_back()); } + + #[test] + fn append_bytes31() { + let mut words: WordArray = Default::default(); + words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031_u256); + assert_eq!( + "01020304050607080910111213141516171819202122232425262728293031", + words_to_hex(words.span()), + ); + + let mut words: WordArray = Default::default(); + words.append_word(0xff, 1); + words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031_u256); + assert_eq!( + "ff01020304050607080910111213141516171819202122232425262728293031", + words_to_hex(words.span()), + ); + + let mut words: WordArray = Default::default(); + words.append_word(0xfffe, 2); + words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031_u256); + assert_eq!( + "fffe01020304050607080910111213141516171819202122232425262728293031", + words_to_hex(words.span()), + ); + + let mut words: WordArray = Default::default(); + words.append_word(0xfffefd, 3); + words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031_u256); + assert_eq!( + "fffefd01020304050607080910111213141516171819202122232425262728293031", + words_to_hex(words.span()), + ); + } + + #[test] + fn append_bytes() { + let mut words: WordArray = Default::default(); + words.append_bytes(0x010203040506070809101112131415161718192021222324252627282930_u256, 30); + assert_eq!( + "010203040506070809101112131415161718192021222324252627282930", + words_to_hex(words.span()), + ); + + let mut words: WordArray = Default::default(); + words.append_word(0xff, 1); + words.append_bytes(0x01020304050607080910111213141516171819202122232425262728_u256, 28); + assert_eq!( + "ff01020304050607080910111213141516171819202122232425262728", + words_to_hex(words.span()), + ); + + let mut words: WordArray = Default::default(); + words.append_word(0xfffe, 2); + words.append_bytes(0x010203040506070809101112131415161718192021222324252627_u256, 27); + assert_eq!( + "fffe010203040506070809101112131415161718192021222324252627", + words_to_hex(words.span()), + ); + + let mut words: WordArray = Default::default(); + words.append_word(0xfffefd, 3); + words.append_bytes(0x01, 1); + assert_eq!("fffefd01", words_to_hex(words.span())); + + let mut words: WordArray = Default::default(); + words.append_word(0xfffefd, 3); + words.append_bytes(0x0102, 2); + assert_eq!("fffefd0102", words_to_hex(words.span())); + } } From 037470d500a675c8758497a909481fd6e530f87e Mon Sep 17 00:00:00 2001 From: Michael Zaikin Date: Thu, 5 Dec 2024 22:44:31 +0000 Subject: [PATCH 2/3] reenable ignored tests --- scripts/data/integration_tests.sh | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/scripts/data/integration_tests.sh b/scripts/data/integration_tests.sh index 6f9cd387..944d7779 100755 --- a/scripts/data/integration_tests.sh +++ b/scripts/data/integration_tests.sh @@ -30,25 +30,6 @@ for arg in "$@"; do done ignored_files=( - "tests/data/full_209999.json", #cairo-run dies, to be investigated - "tests/data/full_403199.json", #cairo-run dies, to be investigated - "tests/data/full_481823.json", #cairo-run dies, to be investigated - "tests/data/full_489888.json", #cairo-run dies, to be investigated - "tests/data/full_491406.json", #cairo-run dies, to be investigated - "tests/data/full_629999.json", #cairo-run dies, to be investigated - "tests/data/full_709631.json", #cairo-run dies, to be investigated - "tests/data/full_774627.json", # Couldn't compute operand op1. Unknown value for memory cell 1:131082 - "tests/data/full_839999.json", # Couldn't compute operand op1. Unknown value for memory cell 1:262154 - "tests/data/full_116927.json", # Run panicked with [108217864776563 ('blocks'), ]. - "tests/data/full_150012.json", # Run panicked with [108217864776563 ('blocks'), ]. - "tests/data/full_2015.json", # Run panicked with [108217864776563 ('blocks'), ]. - "tests/data/full_24834.json", # Run panicked with [108217864776563 ('blocks'), ]. - "tests/data/full_32255.json", # Run panicked with [108217864776563 ('blocks'), ]. - "tests/data/full_478557.json", # Run panicked with [108217864776563 ('blocks'), ]. - "tests/data/full_57042.json", # Run panicked with [108217864776563 ('blocks'), ]. - "tests/data/full_72575.json", # Run panicked with [108217864776563 ('blocks'), ]. - "tests/data/full_757752.json", # Run panicked with [108217864776563 ('blocks'), ]. - # "tests/data/full_478557.json", #runs on server ) ignore_file="tests/data/ignore" From 62ef1ec3087ef518c9e127bbc0037c6abd68fcad Mon Sep 17 00:00:00 2001 From: Michael Zaikin Date: Fri, 6 Dec 2024 11:07:53 +0000 Subject: [PATCH 3/3] Add docstrings --- packages/consensus/src/codec.cairo | 9 +++++--- packages/utils/src/word_array.cairo | 36 ++++++++++++++++------------- 2 files changed, 26 insertions(+), 19 deletions(-) diff --git a/packages/consensus/src/codec.cairo b/packages/consensus/src/codec.cairo index bf41a9a8..36b0dac3 100644 --- a/packages/consensus/src/codec.cairo +++ b/packages/consensus/src/codec.cairo @@ -36,19 +36,22 @@ pub impl EncodeByteArray of Encode { fn encode_to(self: @ByteArray, ref dest: WordArray) { encode_compact_size(self.len(), ref dest); - // Serialized ByteArray: [num_bytes31_chunks, bytes31_chunks..., last_word, last_word_len] + // We cannot get access to ByteArray internals, but we can use Serde to convert + // the inner triple (Array, last_word, last_word_len) into array of felts. + // WARNING: these assumptions might be broken if ByteArray implementation changes + // in corelib. A better approach would be introducing IntoIterator for ByteArray. let mut out: Array = Default::default(); self.serialize(ref out); let mut num_bytes31 = out.pop_front().unwrap(); while num_bytes31 != 0 { - dest.append_bytes31(out.pop_front().unwrap().into()); + dest.append_bytes31(out.pop_front().unwrap()); num_bytes31 -= 1; }; let last_word = out.pop_front().unwrap(); let last_word_len = out.pop_front().unwrap(); - dest.append_bytes(last_word.into(), last_word_len.try_into().unwrap()); + dest.append_bytes(last_word, last_word_len.try_into().unwrap()); } } diff --git a/packages/utils/src/word_array.cairo b/packages/utils/src/word_array.cairo index 6c0eddab..f2d23125 100644 --- a/packages/utils/src/word_array.cairo +++ b/packages/utils/src/word_array.cairo @@ -180,20 +180,22 @@ pub impl WordArrayImpl of WordArrayTrait { } } - fn append_bytes31(ref self: WordArray, value: u256) { + /// Append 31 bytes (max number of full bytes that single field element can store) + fn append_bytes31(ref self: WordArray, value: felt252) { + let bytes31: u256 = value.into(); // DivRem is not implemented for felt252 let bytes28 = if self.last_input_num_bytes == 0 { - let (bytes28, last_word) = DivRem::div_rem(value, 0x1000000); + let (bytes28, last_word) = DivRem::div_rem(bytes31, 0x1000000); self.last_input_word = last_word.try_into().expect('append_bytes31/1'); self.last_input_num_bytes = 3; bytes28 } else if self.last_input_num_bytes == 1 { let (first_word, bytes28) = DivRem::div_rem( - value, 0x100000000000000000000000000000000000000000000000000000000, + bytes31, 0x100000000000000000000000000000000000000000000000000000000, ); self.append_word(first_word.try_into().expect('append_bytes31/2'), 3); bytes28 } else if self.last_input_num_bytes == 2 { - let (bytes29, last_word) = DivRem::div_rem(value, 0x100); + let (bytes29, last_word) = DivRem::div_rem(bytes31, 0x100); let (first_word, bytes28) = DivRem::div_rem( bytes29, 0x100000000000000000000000000000000000000000000000000000000, ); @@ -202,7 +204,7 @@ pub impl WordArrayImpl of WordArrayTrait { self.last_input_num_bytes = 1; bytes28 } else { - let (bytes30, last_word) = DivRem::div_rem(value, 0x10000); + let (bytes30, last_word) = DivRem::div_rem(bytes31, 0x10000); let (first_word, bytes28) = DivRem::div_rem( bytes30, 0x100000000000000000000000000000000000000000000000000000000, ); @@ -227,16 +229,18 @@ pub impl WordArrayImpl of WordArrayTrait { self.input.append(r0.try_into().expect('append_bytes31/13')); } - fn append_bytes(ref self: WordArray, value: u256, num_bytes: u32) { + /// Append up to 31 bytes (byte length provided), packed in a single field element. + fn append_bytes(ref self: WordArray, value: felt252, num_bytes: u32) { + let bytes: u256 = value.into(); // DivRem is not implemented for felt252 let (num_full_words, last_input_num_bytes) = DivRem::div_rem( self.last_input_num_bytes + num_bytes, 4, ); if num_full_words != 0 { let (head, last_word) = if last_input_num_bytes == 0 { - (value, 0) + (bytes, 0) } else { - DivRem::div_rem(value, pow256(last_input_num_bytes)) + DivRem::div_rem(bytes, pow256(last_input_num_bytes)) }; let (mut full_words, mut full_words_num_bytes) = if self.last_input_num_bytes == 0 { @@ -266,7 +270,7 @@ pub impl WordArrayImpl of WordArrayTrait { self.last_input_word = last_word.try_into().expect('append_bytes/3'); self.last_input_num_bytes = last_input_num_bytes; } else { - self.append_word(value.try_into().expect('append_bytes/3'), num_bytes); + self.append_word(bytes.try_into().expect('append_bytes/3'), num_bytes); } } @@ -437,7 +441,7 @@ mod tests { #[test] fn append_bytes31() { let mut words: WordArray = Default::default(); - words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031_u256); + words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031); assert_eq!( "01020304050607080910111213141516171819202122232425262728293031", words_to_hex(words.span()), @@ -445,7 +449,7 @@ mod tests { let mut words: WordArray = Default::default(); words.append_word(0xff, 1); - words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031_u256); + words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031); assert_eq!( "ff01020304050607080910111213141516171819202122232425262728293031", words_to_hex(words.span()), @@ -453,7 +457,7 @@ mod tests { let mut words: WordArray = Default::default(); words.append_word(0xfffe, 2); - words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031_u256); + words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031); assert_eq!( "fffe01020304050607080910111213141516171819202122232425262728293031", words_to_hex(words.span()), @@ -461,7 +465,7 @@ mod tests { let mut words: WordArray = Default::default(); words.append_word(0xfffefd, 3); - words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031_u256); + words.append_bytes31(0x01020304050607080910111213141516171819202122232425262728293031); assert_eq!( "fffefd01020304050607080910111213141516171819202122232425262728293031", words_to_hex(words.span()), @@ -471,7 +475,7 @@ mod tests { #[test] fn append_bytes() { let mut words: WordArray = Default::default(); - words.append_bytes(0x010203040506070809101112131415161718192021222324252627282930_u256, 30); + words.append_bytes(0x010203040506070809101112131415161718192021222324252627282930, 30); assert_eq!( "010203040506070809101112131415161718192021222324252627282930", words_to_hex(words.span()), @@ -479,7 +483,7 @@ mod tests { let mut words: WordArray = Default::default(); words.append_word(0xff, 1); - words.append_bytes(0x01020304050607080910111213141516171819202122232425262728_u256, 28); + words.append_bytes(0x01020304050607080910111213141516171819202122232425262728, 28); assert_eq!( "ff01020304050607080910111213141516171819202122232425262728", words_to_hex(words.span()), @@ -487,7 +491,7 @@ mod tests { let mut words: WordArray = Default::default(); words.append_word(0xfffe, 2); - words.append_bytes(0x010203040506070809101112131415161718192021222324252627_u256, 27); + words.append_bytes(0x010203040506070809101112131415161718192021222324252627, 27); assert_eq!( "fffe010203040506070809101112131415161718192021222324252627", words_to_hex(words.span()),