From 963dfc1acb10c406888433329570b0b8fae4d2e4 Mon Sep 17 00:00:00 2001 From: Pi Lanningham Date: Wed, 10 Jan 2024 18:04:18 -0500 Subject: [PATCH 1/3] Resolve SSW-303 Switches our recursive implementation of do_2_exp with a heavily optimized math.pow2 from the aiken standard library. This version of the module didn't exist at the time, but uses a really clever trick to optimize. For the first 8 powers of 2, it uses a lookup into a bytestring to find the result, and only recurses in big leaps above 2^8. In our benchmarks, this brings the number of orders per batch from 32 to 35!! --- lib/calculation/shared.ak | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lib/calculation/shared.ak b/lib/calculation/shared.ak index dcc7abc..73cd25c 100644 --- a/lib/calculation/shared.ak +++ b/lib/calculation/shared.ak @@ -1,6 +1,7 @@ //// Shared types and functions across all pool calculations use aiken/builtin +use aiken/math use shared.{SingletonValue} /// An interim pool state @@ -65,7 +66,7 @@ fn unsafe_fast_index_with_tail(inputs: List, idx: Int) -> List { pub fn check_and_set_unique(uniqueness_flags: Int, index: Int) -> Int { expect index >= 0 - let bit = do_2_exp(index) + let bit = math.pow2(index) let bit_shifted = 2 * bit let flag_set = uniqueness_flags + bit @@ -73,11 +74,3 @@ pub fn check_and_set_unique(uniqueness_flags: Int, index: Int) -> Int { expect flag_set % bit_shifted > uniqueness_flags % bit_shifted flag_set } - -pub fn do_2_exp(n: Int) -> Int { - if n <= 0 { - 1 - } else { - 2 * do_2_exp(n - 1) - } -} From dd08e96ac227a6b7b4e41151d5e40f93fcbab572 Mon Sep 17 00:00:00 2001 From: Pi Lanningham Date: Mon, 15 Jan 2024 16:00:03 -0500 Subject: [PATCH 2/3] Use an even more hand-optimized version of pow2 TxPipe did some benchmarking and found that unrolling the loop for our typical order sizes helps even more --- lib/calculation/shared.ak | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/lib/calculation/shared.ak b/lib/calculation/shared.ak index 73cd25c..943759e 100644 --- a/lib/calculation/shared.ak +++ b/lib/calculation/shared.ak @@ -66,7 +66,7 @@ fn unsafe_fast_index_with_tail(inputs: List, idx: Int) -> List { pub fn check_and_set_unique(uniqueness_flags: Int, index: Int) -> Int { expect index >= 0 - let bit = math.pow2(index) + let bit = small_pow2(index) let bit_shifted = 2 * bit let flag_set = uniqueness_flags + bit @@ -74,3 +74,32 @@ pub fn check_and_set_unique(uniqueness_flags: Int, index: Int) -> Int { expect flag_set % bit_shifted > uniqueness_flags % bit_shifted flag_set } + +/// This is a version of pow2 that's optimized for small batch sizes +/// It performs a few more granular loop-unrolls, converging on the small lookup index faster +/// This was presented by TxPipe, and squeezes out one extra escrow over math.pow2 for our typical order sizes +pub fn small_pow2(index) -> Int { + let a = #[1, 2, 4, 8, 16, 32, 64, 128] + if e < 8 { + builtin.index_bytearray(a, e) + } else if e < 16 { + // 2^8 * recurse + 256 * builtin.index_bytearray(a, e - 8) + } else if e < 24 { + // 2^16 * recurse + 65536 * builtin.index_bytearray(a, e - 16) + } else if e < 32 { + // 2^24 * recurse + 16777216 * builtin.index_bytearray(a, e - 24) + } else if e < 40 { + // 2^32 * recurse + 4294967296 * builtin.index_bytearray(a, e - 32) + } else { + // Otherwise we can fall back to the built in; + // currently we can't fit more than 40 orders in a batch, but if + // the protocol parameters get bumped, we don't want things to start failing! + // When benchmarking, falling back to the builtin proved to be faster than recursing + // unsure why that is! + math.pow2(e) + } +} \ No newline at end of file From 81c2d68af561c18f4e5e1f4ba3a00650776f1306 Mon Sep 17 00:00:00 2001 From: Pi Lanningham Date: Mon, 15 Jan 2024 16:06:02 -0500 Subject: [PATCH 3/3] Fix variable names That's what I get for not running the tests lol --- lib/calculation/shared.ak | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/lib/calculation/shared.ak b/lib/calculation/shared.ak index 943759e..3e95316 100644 --- a/lib/calculation/shared.ak +++ b/lib/calculation/shared.ak @@ -78,28 +78,28 @@ pub fn check_and_set_unique(uniqueness_flags: Int, index: Int) -> Int { /// This is a version of pow2 that's optimized for small batch sizes /// It performs a few more granular loop-unrolls, converging on the small lookup index faster /// This was presented by TxPipe, and squeezes out one extra escrow over math.pow2 for our typical order sizes -pub fn small_pow2(index) -> Int { - let a = #[1, 2, 4, 8, 16, 32, 64, 128] - if e < 8 { - builtin.index_bytearray(a, e) - } else if e < 16 { - // 2^8 * recurse - 256 * builtin.index_bytearray(a, e - 8) - } else if e < 24 { - // 2^16 * recurse - 65536 * builtin.index_bytearray(a, e - 16) - } else if e < 32 { +pub fn small_pow2(exponent: Int) -> Int { + let single_byte_powers = #[1, 2, 4, 8, 16, 32, 64, 128] + if exponent < 8 { + builtin.index_bytearray(single_byte_powers, exponent) + } else if exponent < 16 { + // 2^8 * table lookup + 256 * builtin.index_bytearray(single_byte_powers, exponent - 8) + } else if exponent < 24 { + // 2^16 * table lookup + 65536 * builtin.index_bytearray(single_byte_powers, exponent - 16) + } else if exponent < 32 { // 2^24 * recurse - 16777216 * builtin.index_bytearray(a, e - 24) - } else if e < 40 { + 16777216 * builtin.index_bytearray(single_byte_powers, exponent - 24) + } else if exponent < 40 { // 2^32 * recurse - 4294967296 * builtin.index_bytearray(a, e - 32) + 4294967296 * builtin.index_bytearray(single_byte_powers, exponent - 32) } else { // Otherwise we can fall back to the built in; // currently we can't fit more than 40 orders in a batch, but if // the protocol parameters get bumped, we don't want things to start failing! // When benchmarking, falling back to the builtin proved to be faster than recursing // unsure why that is! - math.pow2(e) + math.pow2(exponent) } } \ No newline at end of file