Sovereign-Labs · dubbelosix · Jan 9, 2024 · Jan 4, 2024 · Jan 4, 2024 · Jan 5, 2024
diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
@@ -83,11 +83,11 @@ jobs:
       - check-demo-rollup-bash-commands
       - validate-packages-to-publish-yml
     steps:
-    - name: Compute whether the needed jobs succeeded or failed
-      uses: re-actors/alls-green@release/v1
-      with:
-        allowed-skips: deploy-github-pages
-        jobs: ${{ toJSON(needs) }}
+      - name: Compute whether the needed jobs succeeded or failed
+        uses: re-actors/alls-green@release/v1
+        with:
+          allowed-skips: deploy-github-pages
+          jobs: ${{ toJSON(needs) }}
 
   check:
     name: check
@@ -184,6 +184,10 @@ jobs:
             .
             fuzz
       - name: cargo prover bench check
+        env:
+          BLOCKS: 1
+          TXNS_PER_BLOCK: 10
+          NUM_PUB_KEYS: 100
         run: cargo bench --bench prover_bench --features bench
 
   bench_check:
@@ -218,6 +222,10 @@ jobs:
             .
             fuzz
       - name: cargo bench check
+        env:
+          BLOCKS: 1
+          TXNS_PER_BLOCK: 10
+          NUM_PUB_KEYS: 100
         run: cargo bench
 
   # Check that every combination of features is working properly.
@@ -492,7 +500,7 @@ jobs:
         run: cargo run --bin bashtestmd -- --input examples/demo-rollup/README.md --output demo-rollup-readme.sh --tag test-ci
       - run: cat demo-rollup-readme.sh
       - run: chmod +x demo-rollup-readme.sh && ./demo-rollup-readme.sh
-    
+
   validate-packages-to-publish-yml:
     runs-on: ubuntu-latest
     steps:

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -34,7 +34,7 @@ members = [
     "module-system/sov-state",
     "module-system/sov-modules-api",
     "module-system/module-schemas",
-    "module-system/utils/sov-data-generators",
+    "module-system/sov-data-generators",
     "module-system/module-implementations/sov-accounts",
     "module-system/module-implementations/sov-bank",
     "module-system/module-implementations/sov-nft-module",

diff --git a/examples/demo-rollup/Cargo.toml b/examples/demo-rollup/Cargo.toml
@@ -57,6 +57,7 @@ sov-evm = { path = "../../module-system/module-implementations/sov-evm", feature
 sov-bank = { path = "../../module-system/module-implementations/sov-bank", features = ["native"] }
 sov-nft-module = { path = "../../module-system/module-implementations/sov-nft-module", features = ["native"] }
 sov-zk-cycle-macros = { path = "../../utils/zk-cycle-macros" }
+sov-data-generators = { path = "../../module-system/sov-data-generators" }
 humantime = "2.1"
 
 borsh = { workspace = true }

diff --git a/examples/demo-rollup/benches/prover/Makefile b/examples/demo-rollup/benches/prover/Makefile
@@ -0,0 +1,22 @@
+# Default values for num blocks and transactions per block
+BLOCKS ?= 10
+TXNS_PER_BLOCK ?= 100
+NUM_PUB_KEYS ?= 10000
+GENESIS_CONFIG_DIR ?= ../test-data/genesis/benchmark
+
+export BLOCKS
+export TXNS_PER_BLOCK
+export NUM_PUB_KEYS
+export GENESIS_CONFIG_DIR
+
+small_bench:
+	@echo "Small benchmark configuration:"
+	@BLOCKS=2 TXNS_PER_BLOCK=10 NUM_PUB_KEYS=10 make standard_bench
+
+realistic_bench:
+	@echo "Realistic benchmark configuration:"
+	@BLOCKS=100 TXNS_PER_BLOCK=1000 NUM_PUB_KEYS=1000000 make standard_bench
+
+standard_bench:
+	@echo "Running benchmark with $(BLOCKS) transaction blocks, $(TXNS_PER_BLOCK) transactions per block, and $(NUM_PUB_KEYS) public keys"
+	@cd ../.. && cargo bench --features="bench" --bench=prover_bench
diff --git a/examples/demo-rollup/benches/prover/README.md b/examples/demo-rollup/benches/prover/README.md
@@ -1,17 +1,22 @@
 # Prover Benchmarks
-* For benchmarking the prover, we measure the number of risc0 vm cycles for each of the major functions.
-* The reason for using the cycles is the assumption that proving works off a cycles/second (KHz, MHz) based on the hardware used
+
+- For benchmarking the prover, we measure the number of risc0 vm cycles for each of the major functions.
+- The reason for using the cycles is the assumption that proving works off a cycles/second (KHz, MHz) based on the hardware used
 
 ## Running the bench
-* From sovereign-sdk
+
+- From sovereign-sdk
+
 ```
 $ cd examples/demo-rollup/benches/prover
 $ cargo bench --features bench --bench prover_bench
 ```
 
 ## Methodology
-* We have `cycle_tracker` macro defined which can be used to annotate a function in zk that we want to measure the cycles for
-* The `cycle_tracker` macro is defined at `sovereign-sdk/zk-cycle-util`
+
+- We have `cycle_tracker` macro defined which can be used to annotate a function in zk that we want to measure the cycles for
+- The `cycle_tracker` macro is defined at `sovereign-sdk/zk-cycle-util`
+
 ```rust
     #[cfg_attr(all(target_os = "zkvm", feature="bench"), cycle_tracker)]
     fn begin_slot(&mut self, witness: Self::Witness) {
@@ -21,7 +26,9 @@ $ cargo bench --features bench --bench prover_bench
         ));
     }
 ```
-* The method we use to track metrics is by registering the `io_callback` syscall when creating the risc0 host.
+
+- The method we use to track metrics is by registering the `io_callback` syscall when creating the risc0 host.
+
 ```
 pub fn get_syscall_name_handler() -> (SyscallName, fn(&[u8]) -> Vec<u8>) {
     let cycle_string = "cycle_metrics\0";
@@ -48,35 +55,41 @@ pub fn get_syscall_name_handler() -> (SyscallName, fn(&[u8]) -> Vec<u8>) {
     default_env.io_callback(metrics_syscall_name, metrics_callback);
 }
 ```
-* The above allows us to use `risc0_zkvm::guest::env::send_recv_slice` which lets the guest pass a slice of raw bytes to host and get back a vector of bytes
-* We use it to pass cycle metrics to the host
-* Cycles are tracked by the macro which gets a cycle count at the beginning and end of the function
+
+- The above allows us to use `risc0_zkvm::guest::env::send_recv_slice` which lets the guest pass a slice of raw bytes to host and get back a vector of bytes
+- We use it to pass cycle metrics to the host
+- Cycles are tracked by the macro which gets a cycle count at the beginning and end of the function
+
 ```rust
 let before = risc0_zkvm::guest::env::get_cycle_count();
 let result = (|| #block)();
 let after = risc0_zkvm::guest::env::get_cycle_count();
 ```
-* We feature gate the application of the macro `cycle_tracker` with both the target_os set to `zkvm` and the feature flag `bench`
-* The reason for using both is that we need conditional compilation to work in all cases
-* For the purpose of this profiling we run the prover without generating the proof
+
+- We feature gate the application of the macro `cycle_tracker` with both the target_os set to `zkvm` and the feature flag `bench`
+- The reason for using both is that we need conditional compilation to work in all cases
+- For the purpose of this profiling we run the prover without generating the proof
 
 ## Input set
-* Unlike demo-prover it's harder to generate fake data since all the proofs and checks need to succeed. 
-* This means the DA samples, hashes, signatures etc need to succeed
-* To make this easier we use a static input set consisting of 3 blocks
-  * we avoid using empty blocks because they skew average metrics
-  * we have 3 blocks
-    * block 1 -> 1 blob containing 1 create token transaction
-    * block 2 -> 1 blob containing 1 transfer transaction
-    * block 3 -> 1 blob containing 2 transfer transactions
-* This dataset is stored at `demo-prover/benches/blocks.hex`
-* The dataset can be substituted with another valid dataset as well from Celestia (TBD: automate parametrized generation of blocks.hex)
-* We can run this on different kinds of workloads to gauge the efficiency of different parts of the code
+
+- Unlike demo-prover it's harder to generate fake data since all the proofs and checks need to succeed.
+- This means the DA samples, hashes, signatures etc need to succeed
+- To make this easier we use a static input set consisting of 3 blocks
+  - we avoid using empty blocks because they skew average metrics
+  - we have 3 blocks
+    - block 1 -> 1 blob containing 1 create token transaction
+    - block 2 -> 1 blob containing 1 transfer transaction
+    - block 3 -> 1 blob containing 2 transfer transactions
+- This dataset is stored at `demo-prover/benches/blocks.hex`
+- The dataset can be substituted with another valid dataset as well from Celestia (TBD: automate parametrized generation of blocks.hex)
+- We can run this on different kinds of workloads to gauge the efficiency of different parts of the code
 
 ## Result
-* Standard hash function patched with risc0/rust_crypto
-* Signature verification currently NOT patched (TBD)
-* Signature verification takes about 60% of the total cycles
+
+- Standard hash function patched with risc0/rust_crypto
+- Signature verification currently NOT patched (TBD)
+- Signature verification takes about 60% of the total cycles
+
 ```
 Block stats
 
@@ -114,37 +127,46 @@ Cycle Metrics
 ```
 
 ## Custom annotations
-* We can also get finer grained information by annotating low level functions, but the process for this isn't straightforward. 
-* For code that we control, it's as simple as adding the `cycle_tracker` annotation to our function and then feature gating it (not feature gating it causes compilation errors)
-* For external dependencies, we need to fork and include a path dependency locally after annotating
-* We did this for the `jmt` jellyfish merkle tree library to measure cycle gains when we use the risc0 accelerated sha function vs without
-* We apply the risc0 patch in the following way in demo-prover/methods/guest/Cargo.toml
+
+- We can also get finer grained information by annotating low level functions, but the process for this isn't straightforward.
+- For code that we control, it's as simple as adding the `cycle_tracker` annotation to our function and then feature gating it (not feature gating it causes compilation errors)
+- For external dependencies, we need to fork and include a path dependency locally after annotating
+- We did this for the `jmt` jellyfish merkle tree library to measure cycle gains when we use the risc0 accelerated sha function vs without
+- We apply the risc0 patch in the following way in demo-prover/methods/guest/Cargo.toml
+
 ```yaml
 [patch.crates-io]
 sha2 = { git = "https://github.com/risc0/RustCrypto-hashes", tag = "sha2/v0.10.6-risc0" }
 ```
-* Note that the specific tag needs to be pointed to, since master and other branches don't contain acceleration
+
+- Note that the specific tag needs to be pointed to, since master and other branches don't contain acceleration
 
 ## Accelerated vs Non-accelerated libs
-* Accelerated and risc0 optimized crypto libraries give a significant (nearly 10x) cycle gain
-* With sha2 acceleration
+
+- Accelerated and risc0 optimized crypto libraries give a significant (nearly 10x) cycle gain
+- With sha2 acceleration
+
 ```
 =====> hash: 1781
 =====> hash: 1781
 =====> hash: 1781
 =====> hash: 1781
 =====> hash: 1781
 ```
-* Without sha2 acceleration
+
+- Without sha2 acceleration
+
 ```
 =====> hash: 13901
 =====> hash: 13901
 =====> hash: 13901
 =====> hash: 13901
 =====> hash: 13901
 ```
-* Overall performance difference when using sha acceleration vs without for the same dataset (3 blocks, 4 transactions) as described above
-* With sha acceleration
+
+- Overall performance difference when using sha acceleration vs without for the same dataset (3 blocks, 4 transactions) as described above
+- With sha acceleration
+
 ```
 +-------------------------+----------------+-----------+
 | Function                | Average Cycles | Num Calls |
@@ -157,7 +179,9 @@ sha2 = { git = "https://github.com/risc0/RustCrypto-hashes", tag = "sha2/v0.10.6
 +-------------------------+----------------+-----------+
 Total cycles consumed for test: 20834815
 ```
-* Without sha acceleration
+
+- Without sha acceleration
+
 ```
 +-------------------------+----------------+-----------+
 | Function                | Average Cycles | Num Calls |
@@ -170,10 +194,12 @@ Total cycles consumed for test: 20834815
 +-------------------------+----------------+-----------+
 Total cycles consumed for test: 26152702
 ```
-* There's an overall efficiency of 6 million cycles in total for 3 blocks. 
-* Keep in mind that the above table shows average number of cycles per call, so they give an efficiency per call, but the "Total cycles consumed for test" metric at the bottom shows total for 3 blocks
 
-* With ed25519 acceleration
+- There's an overall efficiency of 6 million cycles in total for 3 blocks.
+- Keep in mind that the above table shows average number of cycles per call, so they give an efficiency per call, but the "Total cycles consumed for test" metric at the bottom shows total for 3 blocks
+
+- With ed25519 acceleration
+
 ```
 +----------------------+---------------------+----------------------+----------+-----------+
 | Function             | Avg Cycles w/o Accel | Avg Cycles w/ Accel | % Change | Num Calls |
@@ -196,10 +222,91 @@ Total cycles consumed for test: 26152702
 +----------------------+----------------------+---------------------+----------+-----------+
 
 ```
-* We can see a ~4x speedup for the `verify` function when using risc0 accelerated ed25519-dalek patch
+
+- We can see a ~4x speedup for the `verify` function when using risc0 accelerated ed25519-dalek patch
+
 ```
 [patch.crates-io]
 sha2 = { git = "https://github.com/risc0/RustCrypto-hashes", tag = "sha2/v0.10.6-risc0" }
 ed25519-dalek = { git = "https://github.com/risc0/curve25519-dalek", tag = "curve25519-4.1.0-risczero.1" }
 crypto-bigint = {git = "https://github.com/risc0/RustCrypto-crypto-bigint", tag = "v0.5.2-risc0"}
-```
+```
+
+## Augmented input set
+
+- In order to increase the accuracy of the benchmarks, and get estimates closer to real use-cases, we have integrated the data-generation module `sov-data-generators`, to be able to generate transaction data more easily. We have added cycle-tracking methods to have a finer understanding of the system's performances.
+
+For our benchmark, we have used two block types:
+
+- block 1 -> 1 blob containing 1 create token transaction
+- block 2 -> 1 blob containing 100 transfer transaction to random addresses, repeated 10 times
+
+Here are the results (including ed25519 acceleration):
+
+### Block Stats
+
+| Description                              | Value |
+| ---------------------------------------- | ----- |
+| Total blocks                             | 11    |
+| Blocks with transactions                 | 11    |
+| Number of blobs                          | 11    |
+| Total number of transactions             | 1001  |
+| Average number of transactions per block | 91    |
+
+### Cycle Metrics
+
+| Function             | Average Cycles | Num Calls |
+| -------------------- | -------------- | --------- |
+| Cycles per block     | 78,058,312     | 11        |
+| apply_blob           | 74,186,372     | 11        |
+| pre_process_batch    | 71,891,297     | 11        |
+| verify_txs_stateless | 71,555,628     | 11        |
+| apply_txs            | 2,258,064      | 11        |
+| end_slot             | 2,008,051      | 11        |
+| jmt_verify_update    | 1,086,936      | 11        |
+| jmt_verify_existence | 792,805        | 11        |
+| verify               | 734,681        | 1001      |
+| decode_txs           | 238,998        | 11        |
+| begin_slot           | 98,566         | 11        |
+| deserialize_batch    | 88,472         | 11        |
+| deserialize          | 23,515         | 1001      |
+| hash                 | 5,556          | 1001      |
+| commit               | 7              | 11        |
+
+**Total cycles consumed for test: 858,641,427**
+
+## Benchmarks with prepopulated accounts
+
+Now we compare these results by prepopulating the accounts module with 1M accounts.
+
+### Block Stats
+
+| Description                              | Value |
+| ---------------------------------------- | ----- |
+| Total blocks                             | 11    |
+| Blocks with transactions                 | 11    |
+| Number of blobs                          | 11    |
+| Total number of transactions             | 1001  |
+| Average number of transactions per block | 91    |
+
+### Cycle Metrics
+
+| Function             | Average Cycles | Num Calls |
+| -------------------- | -------------- | --------- |
+| Cycles per block     | 82,501,342     | 11        |
+| apply_blob           | 73,774,539     | 11        |
+| pre_process_batch    | 71,614,640     | 11        |
+| verify_txs_stateless | 71,203,340     | 11        |
+| end_slot             | 5,277,919      | 11        |
+| jmt_verify_update    | 3,007,153      | 11        |
+| jmt_verify_existence | 2,143,099      | 11        |
+| apply_txs            | 2,120,704      | 11        |
+| verify               | 731,327        | 1001      |
+| decode_txs           | 308,557        | 11        |
+| begin_slot           | 184,097        | 11        |
+| deserialize_batch    | 82,908         | 11        |
+| deserialize          | 24,004         | 1001      |
+| hash                 | 5,852          | 1001      |
+| commit               | 7              | 11        |
+
+**Total cycles consumed for test: 907,514,763**