From b5490a04d3b73e520de9bdae0d132fa35a885665 Mon Sep 17 00:00:00 2001 From: Alex Ostrovski Date: Mon, 28 Oct 2024 18:31:35 +0200 Subject: [PATCH] test(vm): Improve instruction-counting VM benchmark (#3105) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What ❔ Replaces `iai` with an alternative; brushes up instruction counting in general. ## Why ❔ - The library currently used for the benchmark (`iai`) is unmaintained. - It doesn't work with newer valgrind versions. - It doesn't allow measuring parts of program execution, only the entire program run. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [x] Tests for the changes have been added / updated. - [x] Documentation comments have been added / updated. - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`. --- .github/workflows/vm-perf-comparison.yml | 33 ++- .github/workflows/vm-perf-to-prometheus.yml | 4 +- Cargo.lock | 57 +++-- Cargo.toml | 4 +- core/lib/multivm/src/versions/vm_fast/mod.rs | 2 +- core/lib/vm_executor/src/batch/factory.rs | 2 +- core/tests/vm-benchmark/Cargo.toml | 4 +- core/tests/vm-benchmark/benches/iai.rs | 35 --- .../vm-benchmark/benches/instructions.rs | 206 ++++++++++++++++++ core/tests/vm-benchmark/src/bin/common/mod.rs | 54 ----- .../src/bin/compare_iai_results.rs | 108 --------- .../src/bin/iai_results_to_prometheus.rs | 52 ----- .../src/bin/instruction_counts.rs | 106 ++++++++- core/tests/vm-benchmark/src/criterion.rs | 6 +- core/tests/vm-benchmark/src/lib.rs | 2 +- core/tests/vm-benchmark/src/vm.rs | 114 ++++++---- 16 files changed, 446 insertions(+), 343 deletions(-) delete mode 100644 core/tests/vm-benchmark/benches/iai.rs create mode 100644 core/tests/vm-benchmark/benches/instructions.rs delete mode 100644 core/tests/vm-benchmark/src/bin/common/mod.rs delete mode 100644 core/tests/vm-benchmark/src/bin/compare_iai_results.rs delete mode 100644 core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs diff --git a/.github/workflows/vm-perf-comparison.yml b/.github/workflows/vm-perf-comparison.yml index 49830a30cc1e..3520419f1337 100644 --- a/.github/workflows/vm-perf-comparison.yml +++ b/.github/workflows/vm-perf-comparison.yml @@ -40,6 +40,8 @@ jobs: echo "SCCACHE_GCS_SERVICE_ACCOUNT=gha-ci-runners@matterlabs-infra.iam.gserviceaccount.com" >> .env echo "SCCACHE_GCS_RW_MODE=READ_WRITE" >> .env echo "RUSTC_WRAPPER=sccache" >> .env + # Set the minimum reported instruction count difference to reduce noise + echo "BENCHMARK_DIFF_THRESHOLD_PERCENT=2" >> .env - name: init run: | @@ -51,8 +53,8 @@ jobs: run: | ci_run zkstackup -g --local ci_run zkstack dev contracts --system-contracts - ci_run cargo bench --package vm-benchmark --bench iai | tee base-iai - ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee base-opcodes || touch base-opcodes + ci_run cargo bench --package vm-benchmark --bench instructions -- --verbose || echo "Instructions benchmark is missing" + ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee base-opcodes - name: checkout PR run: | @@ -60,24 +62,39 @@ jobs: - name: run benchmarks on PR shell: bash + id: comparison run: | ci_run zkstackup -g --local ci_run zkstack dev contracts --system-contracts - ci_run cargo bench --package vm-benchmark --bench iai | tee pr-iai - ci_run cargo run --package vm-benchmark --release --bin instruction_counts | tee pr-opcodes || touch pr-opcodes + ci_run cargo bench --package vm-benchmark --bench instructions -- --verbose + ci_run cargo bench --package vm-benchmark --bench instructions -- --print > instructions.log 2>/dev/null + # Output all lines from the benchmark result starting from the "## ..." comparison header. + # Since the output spans multiple lines, we use a heredoc declaration. EOF=$(dd if=/dev/urandom bs=15 count=1 status=none | base64) echo "speedup<<$EOF" >> $GITHUB_OUTPUT - ci_run cargo run --package vm-benchmark --release --bin compare_iai_results base-iai pr-iai base-opcodes pr-opcodes >> $GITHUB_OUTPUT + sed -n '/^## /,$p' instructions.log >> $GITHUB_OUTPUT + echo "$EOF" >> $GITHUB_OUTPUT + + ci_run cargo run --package vm-benchmark --release --bin instruction_counts -- --diff base-opcodes > opcodes.log + echo "opcodes<<$EOF" >> $GITHUB_OUTPUT + sed -n '/^## /,$p' opcodes.log >> $GITHUB_OUTPUT echo "$EOF" >> $GITHUB_OUTPUT - id: comparison - name: Comment on PR uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # v2.5.0 + if: steps.comparison.outputs.speedup != '' || steps.comparison.outputs.opcodes != '' with: message: | - ${{ steps.comparison.outputs.speedup == '' && '## No performance difference detected (anymore)' || '## Detected VM performance changes' }} ${{ steps.comparison.outputs.speedup }} + ${{ steps.comparison.outputs.opcodes }} comment_tag: vm-performance-changes mode: recreate - create_if_not_exists: ${{ steps.comparison.outputs.speedup != '' }} + create_if_not_exists: true + - name: Remove PR comment + uses: thollander/actions-comment-pull-request@fabd468d3a1a0b97feee5f6b9e499eab0dd903f6 # v2.5.0 + if: steps.comparison.outputs.speedup == '' && steps.comparison.outputs.opcodes == '' + with: + comment_tag: vm-performance-changes + message: 'No performance difference detected (anymore)' + mode: delete diff --git a/.github/workflows/vm-perf-to-prometheus.yml b/.github/workflows/vm-perf-to-prometheus.yml index d336a1472e4a..93d33116794f 100644 --- a/.github/workflows/vm-perf-to-prometheus.yml +++ b/.github/workflows/vm-perf-to-prometheus.yml @@ -48,5 +48,5 @@ jobs: ci_run cargo bench --package vm-benchmark --bench oneshot # Run only benches with 1,000 transactions per batch to not spend too much time ci_run cargo bench --package vm-benchmark --bench batch '/1000$' - ci_run cargo bench --package vm-benchmark --bench iai | tee iai-result - ci_run cargo run --package vm-benchmark --bin iai_results_to_prometheus --release < iai-result + ci_run cargo bench --package vm-benchmark --bench instructions -- --verbose + ci_run cargo bench --package vm-benchmark --bench instructions -- --print diff --git a/Cargo.lock b/Cargo.lock index 2e3a8a1c3e3a..597da3c1b31b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -959,7 +959,7 @@ name = "block_reverter" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.18", + "clap 4.5.20", "serde_json", "tokio", "zksync_block_reverter", @@ -1445,9 +1445,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.18" +version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0956a43b323ac1afaffc053ed5c4b7c1f1800bacd1683c353aabbb752515dd3" +checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" dependencies = [ "clap_builder", "clap_derive", @@ -1455,14 +1455,15 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.18" +version = "4.5.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d72166dd41634086d5803a47eb71ae740e61d84709c36f3c34110173db3961b" +checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" dependencies = [ "anstream", "anstyle", "clap_lex 0.7.2", "strsim 0.11.1", + "terminal_size", ] [[package]] @@ -2796,7 +2797,7 @@ name = "genesis_generator" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.18", + "clap 4.5.20", "futures 0.3.30", "serde", "serde_json", @@ -3472,12 +3473,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "iai" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71a816c97c42258aa5834d07590b718b4c9a598944cd39a52dc25b351185d678" - [[package]] name = "iana-time-zone" version = "0.1.61" @@ -4122,7 +4117,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4" dependencies = [ "cfg-if", - "windows-targets 0.52.6", + "windows-targets 0.48.5", ] [[package]] @@ -4392,7 +4387,7 @@ name = "merkle_tree_consistency_checker" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.18", + "clap 4.5.20", "tracing", "zksync_config", "zksync_env_config", @@ -5498,7 +5493,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ "bytes", - "heck 0.5.0", + "heck 0.4.1", "itertools 0.12.1", "log", "multimap", @@ -6583,7 +6578,7 @@ name = "selector_generator" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.18", + "clap 4.5.20", "ethabi", "glob", "hex", @@ -7907,6 +7902,16 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "terminal_size" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f599bd7ca042cfdf8f4512b277c02ba102247820f9d9d4a9f521f496751a6ef" +dependencies = [ + "rustix", + "windows-sys 0.59.0", +] + [[package]] name = "test-casing" version = "0.1.3" @@ -8751,11 +8756,11 @@ version = "0.1.0" dependencies = [ "assert_matches", "criterion", - "iai", "once_cell", "rand 0.8.5", "tokio", "vise", + "yab", "zksync_contracts", "zksync_multivm", "zksync_types", @@ -9239,6 +9244,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "yab" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b06cc62d4cec617d3c259537be0fcaa8a5bcf72ddf2983823d9528605f36ed3" +dependencies = [ + "anes", + "clap 4.5.20", + "num_cpus", + "thiserror", +] + [[package]] name = "yansi" version = "1.0.1" @@ -10229,7 +10246,7 @@ dependencies = [ "anyhow", "assert_matches", "async-trait", - "clap 4.5.18", + "clap 4.5.20", "envy", "futures 0.3.30", "rustc_version", @@ -10437,7 +10454,7 @@ version = "0.1.0" dependencies = [ "anyhow", "assert_matches", - "clap 4.5.18", + "clap 4.5.20", "insta", "leb128", "once_cell", @@ -11017,7 +11034,7 @@ name = "zksync_server" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.18", + "clap 4.5.20", "futures 0.3.30", "serde_json", "tikv-jemallocator", diff --git a/Cargo.toml b/Cargo.toml index dc6fdf1727e3..6d51e5060aa8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -122,6 +122,7 @@ derive_more = "1.0.0" envy = "0.4" ethabi = "18.0.0" flate2 = "1.0.28" +fraction = "0.15.3" futures = "0.3" glob = "0.3" google-cloud-auth = "0.16.0" @@ -131,7 +132,6 @@ hex = "0.4" http = "1.1" httpmock = "0.7.0" hyper = "1.3" -iai = "0.1" insta = "1.29.0" itertools = "0.10" jsonrpsee = { version = "0.23", default-features = false } @@ -190,7 +190,7 @@ tracing-opentelemetry = "0.25.0" time = "0.3.36" # Has to be same as used by `tracing-subscriber` url = "2" web3 = "0.19.0" -fraction = "0.15.3" +yab = "0.1.0" # Proc-macro syn = "2.0" diff --git a/core/lib/multivm/src/versions/vm_fast/mod.rs b/core/lib/multivm/src/versions/vm_fast/mod.rs index bb5a342bff28..35789c6cdc9a 100644 --- a/core/lib/multivm/src/versions/vm_fast/mod.rs +++ b/core/lib/multivm/src/versions/vm_fast/mod.rs @@ -1,4 +1,4 @@ -pub use zksync_vm2::interface::Tracer; +pub use zksync_vm2::interface; pub use self::{circuits_tracer::CircuitsTracer, vm::Vm}; diff --git a/core/lib/vm_executor/src/batch/factory.rs b/core/lib/vm_executor/src/batch/factory.rs index f974d17f4a75..de0db5f0bf75 100644 --- a/core/lib/vm_executor/src/batch/factory.rs +++ b/core/lib/vm_executor/src/batch/factory.rs @@ -37,7 +37,7 @@ pub trait BatchTracer: fmt::Debug + 'static + Send + Sealed { const TRACE_CALLS: bool; /// Tracer for the fast VM. #[doc(hidden)] - type Fast: vm_fast::Tracer + Default + 'static; + type Fast: vm_fast::interface::Tracer + Default + 'static; } impl Sealed for () {} diff --git a/core/tests/vm-benchmark/Cargo.toml b/core/tests/vm-benchmark/Cargo.toml index 59c1e21493b4..892bcf1c1051 100644 --- a/core/tests/vm-benchmark/Cargo.toml +++ b/core/tests/vm-benchmark/Cargo.toml @@ -21,7 +21,7 @@ tokio.workspace = true [dev-dependencies] assert_matches.workspace = true -iai.workspace = true +yab.workspace = true [[bench]] name = "oneshot" @@ -32,5 +32,5 @@ name = "batch" harness = false [[bench]] -name = "iai" +name = "instructions" harness = false diff --git a/core/tests/vm-benchmark/benches/iai.rs b/core/tests/vm-benchmark/benches/iai.rs deleted file mode 100644 index 8cbb9f10dd83..000000000000 --- a/core/tests/vm-benchmark/benches/iai.rs +++ /dev/null @@ -1,35 +0,0 @@ -use iai::black_box; -use vm_benchmark::{BenchmarkingVm, BenchmarkingVmFactory, Bytecode, Fast, Legacy}; - -fn run_bytecode(name: &str) { - let tx = Bytecode::get(name).deploy_tx(); - black_box(BenchmarkingVm::::default().run_transaction(&tx)); -} - -macro_rules! make_functions_and_main { - ($($file:ident => $legacy_name:ident,)+) => { - $( - fn $file() { - run_bytecode::(stringify!($file)); - } - - fn $legacy_name() { - run_bytecode::(stringify!($file)); - } - )+ - - iai::main!($($file, $legacy_name,)+); - }; -} - -make_functions_and_main!( - access_memory => access_memory_legacy, - call_far => call_far_legacy, - decode_shl_sub => decode_shl_sub_legacy, - deploy_simple_contract => deploy_simple_contract_legacy, - finish_eventful_frames => finish_eventful_frames_legacy, - write_and_decode => write_and_decode_legacy, - event_spam => event_spam_legacy, - slot_hash_collision => slot_hash_collision_legacy, - heap_read_write => heap_read_write_legacy, -); diff --git a/core/tests/vm-benchmark/benches/instructions.rs b/core/tests/vm-benchmark/benches/instructions.rs new file mode 100644 index 000000000000..654dfef71b29 --- /dev/null +++ b/core/tests/vm-benchmark/benches/instructions.rs @@ -0,0 +1,206 @@ +//! Measures the number of host instructions required to run the benchmark bytecodes. + +use std::{env, sync::mpsc}; + +use vise::{Gauge, LabeledFamily, Metrics}; +use vm_benchmark::{ + criterion::PrometheusRuntime, BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, BYTECODES, +}; +use yab::{ + reporter::{BenchmarkOutput, BenchmarkReporter, Reporter}, + AccessSummary, BenchMode, Bencher, BenchmarkId, +}; + +fn benchmarks_for_vm(bencher: &mut Bencher) { + bencher.bench( + BenchmarkId::new("init", VM::LABEL.as_str()), + BenchmarkingVm::::default, + ); + + for bytecode in BYTECODES { + bencher.bench_with_capture( + BenchmarkId::new(bytecode.name, VM::LABEL.as_str()), + |capture| { + let mut vm = yab::black_box(BenchmarkingVm::::default()); + let tx = yab::black_box(bytecode.deploy_tx()); + capture.measure(|| vm.run_transaction(&tx)); + }, + ); + } +} + +/// Reporter that pushes cachegrind metrics to Prometheus. +#[derive(Debug)] +struct MetricsReporter { + _runtime: Option, +} + +impl Default for MetricsReporter { + fn default() -> Self { + Self { + _runtime: PrometheusRuntime::new(), + } + } +} + +impl Reporter for MetricsReporter { + fn new_benchmark(&mut self, id: &BenchmarkId) -> Box { + Box::new(MetricsBenchmarkReporter(id.clone())) + } +} + +#[derive(Debug)] +struct MetricsBenchmarkReporter(BenchmarkId); + +impl BenchmarkReporter for MetricsBenchmarkReporter { + fn ok(self: Box, output: &BenchmarkOutput) { + #[derive(Debug, Metrics)] + #[metrics(prefix = "vm_cachegrind")] + struct VmCachegrindMetrics { + #[metrics(labels = ["benchmark"])] + instructions: LabeledFamily>, + #[metrics(labels = ["benchmark"])] + l1_accesses: LabeledFamily>, + #[metrics(labels = ["benchmark"])] + l2_accesses: LabeledFamily>, + #[metrics(labels = ["benchmark"])] + ram_accesses: LabeledFamily>, + #[metrics(labels = ["benchmark"])] + cycles: LabeledFamily>, + } + + #[vise::register] + static VM_CACHEGRIND_METRICS: vise::Global = vise::Global::new(); + + let id = self.0.to_string(); + VM_CACHEGRIND_METRICS.instructions[&id].set(output.stats.total_instructions()); + if let Some(&full) = output.stats.as_full() { + let summary = AccessSummary::from(full); + VM_CACHEGRIND_METRICS.l1_accesses[&id].set(summary.l1_hits); + VM_CACHEGRIND_METRICS.l2_accesses[&id].set(summary.l3_hits); + VM_CACHEGRIND_METRICS.ram_accesses[&id].set(summary.ram_accesses); + VM_CACHEGRIND_METRICS.cycles[&id].set(summary.estimated_cycles()); + } + } +} + +#[derive(Debug, Clone, Copy)] +struct Comparison { + current_cycles: u64, + prev_cycles: Option, +} + +impl Comparison { + fn percent_difference(a: u64, b: u64) -> f64 { + ((b as i64) - (a as i64)) as f64 / (a as f64) * 100.0 + } + + fn new(output: &BenchmarkOutput) -> Option { + let current_cycles = AccessSummary::from(*output.stats.as_full()?).estimated_cycles(); + let prev_cycles = if let Some(prev_stats) = &output.prev_stats { + Some(AccessSummary::from(*prev_stats.as_full()?).estimated_cycles()) + } else { + None + }; + + Some(Self { + current_cycles, + prev_cycles, + }) + } + + fn cycles_diff(&self) -> Option { + self.prev_cycles + .map(|prev_cycles| Self::percent_difference(prev_cycles, self.current_cycles)) + } +} + +/// Reporter that outputs diffs in a Markdown table to stdout after all benchmarks are completed. +/// +/// Significant diff level can be changed via `BENCHMARK_DIFF_THRESHOLD_PERCENT` env var; it is set to 1% by default. +#[derive(Debug)] +struct ComparisonReporter { + comparisons_sender: mpsc::Sender<(String, Comparison)>, + comparisons_receiver: mpsc::Receiver<(String, Comparison)>, +} + +impl Default for ComparisonReporter { + fn default() -> Self { + let (comparisons_sender, comparisons_receiver) = mpsc::channel(); + Self { + comparisons_sender, + comparisons_receiver, + } + } +} + +impl Reporter for ComparisonReporter { + fn new_benchmark(&mut self, id: &BenchmarkId) -> Box { + Box::new(BenchmarkComparison { + comparisons_sender: self.comparisons_sender.clone(), + id: id.clone(), + }) + } + + fn ok(self: Box) { + const ENV_VAR: &str = "BENCHMARK_DIFF_THRESHOLD_PERCENT"; + + let diff_threshold = env::var(ENV_VAR).unwrap_or_else(|_| "1.0".into()); + let diff_threshold: f64 = diff_threshold.parse().unwrap_or_else(|err| { + panic!("incorrect `{ENV_VAR}` value: {err}"); + }); + + // Drop the sender to not hang on the iteration below. + drop(self.comparisons_sender); + let mut comparisons: Vec<_> = self.comparisons_receiver.iter().collect(); + comparisons.retain(|(_, diff)| { + // Output all stats if `diff_threshold <= 0.0` since this is what the user expects + diff.cycles_diff().unwrap_or(0.0) >= diff_threshold + }); + if comparisons.is_empty() { + return; + } + + comparisons.sort_unstable_by(|(name, _), (other_name, _)| name.cmp(other_name)); + + println!("\n## Detected VM performance changes"); + println!("Benchmark name | Est. cycles | Change in est. cycles |"); + println!("|:---|---:|---:|"); + for (name, comparison) in &comparisons { + let diff = comparison + .cycles_diff() + .map_or_else(|| "N/A".to_string(), |diff| format!("{diff:+.1}%")); + println!("| {name} | {} | {diff} |", comparison.current_cycles); + } + } +} + +#[derive(Debug)] +struct BenchmarkComparison { + comparisons_sender: mpsc::Sender<(String, Comparison)>, + id: BenchmarkId, +} + +impl BenchmarkReporter for BenchmarkComparison { + fn ok(self: Box, output: &BenchmarkOutput) { + if let Some(diff) = Comparison::new(output) { + self.comparisons_sender + .send((self.id.to_string(), diff)) + .ok(); + } + } +} + +fn benchmarks(bencher: &mut Bencher) { + if bencher.mode() == BenchMode::PrintResults { + // Only customize reporting if outputting previously collected benchmark result in order to prevent + // reporters influencing cachegrind stats. + bencher + .add_reporter(MetricsReporter::default()) + .add_reporter(ComparisonReporter::default()); + } + benchmarks_for_vm::(bencher); + benchmarks_for_vm::(bencher); +} + +yab::main!(benchmarks); diff --git a/core/tests/vm-benchmark/src/bin/common/mod.rs b/core/tests/vm-benchmark/src/bin/common/mod.rs deleted file mode 100644 index a92c9d5f710c..000000000000 --- a/core/tests/vm-benchmark/src/bin/common/mod.rs +++ /dev/null @@ -1,54 +0,0 @@ -use std::io::BufRead; - -#[derive(Debug)] -pub struct IaiResult { - pub name: String, - pub instructions: u64, - pub l1_accesses: u64, - pub l2_accesses: u64, - pub ram_accesses: u64, - pub cycles: u64, -} - -pub fn parse_iai(iai_output: R) -> impl Iterator { - IaiResultParser { - lines: iai_output.lines().map(|x| x.unwrap()), - } -} - -struct IaiResultParser> { - lines: I, -} - -impl> Iterator for IaiResultParser { - type Item = IaiResult; - - fn next(&mut self) -> Option { - self.lines.next().map(|name| { - let result = IaiResult { - name, - instructions: self.parse_stat(), - l1_accesses: self.parse_stat(), - l2_accesses: self.parse_stat(), - ram_accesses: self.parse_stat(), - cycles: self.parse_stat(), - }; - self.lines.next(); - result - }) - } -} - -impl> IaiResultParser { - fn parse_stat(&mut self) -> u64 { - let line = self.lines.next().unwrap(); - let number = line - .split(':') - .nth(1) - .unwrap() - .split_whitespace() - .next() - .unwrap(); - number.parse().unwrap() - } -} diff --git a/core/tests/vm-benchmark/src/bin/compare_iai_results.rs b/core/tests/vm-benchmark/src/bin/compare_iai_results.rs deleted file mode 100644 index c274b039c9bd..000000000000 --- a/core/tests/vm-benchmark/src/bin/compare_iai_results.rs +++ /dev/null @@ -1,108 +0,0 @@ -use std::{ - collections::{HashMap, HashSet}, - fs::File, - io::{BufRead, BufReader}, -}; - -pub use crate::common::parse_iai; - -mod common; - -fn main() { - let [iai_before, iai_after, opcodes_before, opcodes_after] = std::env::args() - .skip(1) - .take(4) - .collect::>() - .try_into() - .expect("expected four arguments"); - - let iai_before = get_name_to_cycles(&iai_before); - let iai_after = get_name_to_cycles(&iai_after); - let opcodes_before = get_name_to_opcodes(&opcodes_before); - let opcodes_after = get_name_to_opcodes(&opcodes_after); - - let perf_changes = iai_before - .keys() - .collect::>() - .intersection(&iai_after.keys().collect()) - .map(|&name| (name, percent_difference(iai_before[name], iai_after[name]))) - .collect::>(); - - let duration_changes = opcodes_before - .keys() - .collect::>() - .intersection(&opcodes_after.keys().collect()) - .map(|&name| { - let opcodes_abs_diff = (opcodes_after[name] as i64) - (opcodes_before[name] as i64); - (name, opcodes_abs_diff) - }) - .collect::>(); - - let mut nonzero_diff = false; - - for name in perf_changes - .iter() - .filter_map(|(key, value)| (value.abs() > 2.).then_some(key)) - .collect::>() - .union( - &duration_changes - .iter() - .filter_map(|(key, value)| (*value != 0).then_some(key)) - .collect(), - ) - { - // write the header before writing the first line of diff - if !nonzero_diff { - println!("Benchmark name | change in estimated runtime | change in number of opcodes executed \n--- | --- | ---"); - nonzero_diff = true; - } - - let n_a = "N/A".to_string(); - println!( - "{} | {} | {}", - name, - perf_changes - .get(**name) - .map(|percent| format!("{:+.1}%", percent)) - .unwrap_or(n_a.clone()), - duration_changes - .get(**name) - .map(|abs_diff| format!( - "{:+} ({:+.1}%)", - abs_diff, - percent_difference(opcodes_before[**name], opcodes_after[**name]) - )) - .unwrap_or(n_a), - ); - } - - if nonzero_diff { - println!("\n Changes in number of opcodes executed indicate that the gas price of the benchmark has changed, which causes it run out of gas at a different time. Or that it is behaving completely differently."); - } -} - -fn percent_difference(a: u64, b: u64) -> f64 { - ((b as f64) - (a as f64)) / (a as f64) * 100.0 -} - -fn get_name_to_cycles(filename: &str) -> HashMap { - parse_iai(BufReader::new( - File::open(filename).expect("failed to open file"), - )) - .map(|x| (x.name, x.cycles)) - .collect() -} - -fn get_name_to_opcodes(filename: &str) -> HashMap { - BufReader::new(File::open(filename).expect("failed to open file")) - .lines() - .map(|line| { - let line = line.unwrap(); - let mut it = line.split_whitespace(); - ( - it.next().unwrap().to_string(), - it.next().unwrap().parse().unwrap(), - ) - }) - .collect() -} diff --git a/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs b/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs deleted file mode 100644 index 3b3aa05bf69c..000000000000 --- a/core/tests/vm-benchmark/src/bin/iai_results_to_prometheus.rs +++ /dev/null @@ -1,52 +0,0 @@ -use std::{env, io::BufReader, time::Duration}; - -use tokio::sync::watch; -use vise::{Gauge, LabeledFamily, Metrics}; -use zksync_vlog::prometheus::PrometheusExporterConfig; - -use crate::common::{parse_iai, IaiResult}; - -mod common; - -#[derive(Debug, Metrics)] -#[metrics(prefix = "vm_cachegrind")] -pub(crate) struct VmCachegrindMetrics { - #[metrics(labels = ["benchmark"])] - pub instructions: LabeledFamily>, - #[metrics(labels = ["benchmark"])] - pub l1_accesses: LabeledFamily>, - #[metrics(labels = ["benchmark"])] - pub l2_accesses: LabeledFamily>, - #[metrics(labels = ["benchmark"])] - pub ram_accesses: LabeledFamily>, - #[metrics(labels = ["benchmark"])] - pub cycles: LabeledFamily>, -} - -#[vise::register] -pub(crate) static VM_CACHEGRIND_METRICS: vise::Global = vise::Global::new(); - -#[tokio::main] -async fn main() { - let results: Vec = parse_iai(BufReader::new(std::io::stdin())).collect(); - - let endpoint = env::var("BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL") - .expect("`BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL` env var is not set"); - let (stop_sender, stop_receiver) = watch::channel(false); - let prometheus_config = - PrometheusExporterConfig::push(endpoint.to_owned(), Duration::from_millis(100)); - tokio::spawn(prometheus_config.run(stop_receiver)); - - for result in results { - let name = result.name; - VM_CACHEGRIND_METRICS.instructions[&name.clone()].set(result.instructions); - VM_CACHEGRIND_METRICS.l1_accesses[&name.clone()].set(result.l1_accesses); - VM_CACHEGRIND_METRICS.l2_accesses[&name.clone()].set(result.l2_accesses); - VM_CACHEGRIND_METRICS.ram_accesses[&name.clone()].set(result.ram_accesses); - VM_CACHEGRIND_METRICS.cycles[&name].set(result.cycles); - } - - println!("Waiting for push to happen..."); - tokio::time::sleep(Duration::from_secs(1)).await; - stop_sender.send_replace(true); -} diff --git a/core/tests/vm-benchmark/src/bin/instruction_counts.rs b/core/tests/vm-benchmark/src/bin/instruction_counts.rs index 96208007fd97..ece30a66cee3 100644 --- a/core/tests/vm-benchmark/src/bin/instruction_counts.rs +++ b/core/tests/vm-benchmark/src/bin/instruction_counts.rs @@ -1,16 +1,100 @@ //! Runs all benchmarks and prints out the number of zkEVM opcodes each one executed. -use vm_benchmark::{BenchmarkingVmFactory, Fast, Legacy, BYTECODES}; +use std::{collections::BTreeMap, env, fs, io, path::PathBuf}; -fn main() { - for bytecode in BYTECODES { - let tx = bytecode.deploy_tx(); - let name = bytecode.name; - println!("{name} {}", Fast::<()>::count_instructions(&tx)); - println!( - "{} {}", - name.to_string() + "_legacy", - Legacy::count_instructions(&tx) - ); +use vm_benchmark::{CountInstructions, Fast, Legacy, BYTECODES}; + +#[derive(Debug)] +enum Command { + Print, + Diff { old: PathBuf }, +} + +impl Command { + fn from_env() -> Self { + let mut args = env::args().skip(1); + let Some(first) = args.next() else { + return Self::Print; + }; + assert_eq!(first, "--diff", "Unsupported command-line arg"); + let old = args.next().expect("`--diff` requires a path to old file"); + Self::Diff { old: old.into() } } + + fn print_instructions(counts: &BTreeMap<&str, usize>) { + for (bytecode_name, count) in counts { + println!("{bytecode_name} {count}"); + } + } + + fn parse_counts(reader: impl io::BufRead) -> BTreeMap { + let mut counts = BTreeMap::new(); + for line in reader.lines() { + let line = line.unwrap(); + if line.is_empty() { + continue; + } + let (name, count) = line.split_once(' ').expect("invalid output format"); + let count = count.parse().unwrap_or_else(|err| { + panic!("invalid count for `{name}`: {err}"); + }); + counts.insert(name.to_owned(), count); + } + counts + } + + fn run(self) { + let counts: BTreeMap<_, _> = BYTECODES + .iter() + .map(|bytecode| { + let tx = bytecode.deploy_tx(); + // We have a unit test comparing stats, but do it here as well just in case. + let fast_count = Fast::count_instructions(&tx); + let legacy_count = Legacy::count_instructions(&tx); + assert_eq!( + fast_count, legacy_count, + "mismatch on number of instructions on bytecode `{}`", + bytecode.name + ); + + (bytecode.name, fast_count) + }) + .collect(); + + match self { + Self::Print => Self::print_instructions(&counts), + Self::Diff { old } => { + let file = fs::File::open(&old).unwrap_or_else(|err| { + panic!("failed opening `{}`: {err}", old.display()); + }); + let reader = io::BufReader::new(file); + let old_counts = Self::parse_counts(reader); + + let differing_counts: Vec<_> = counts + .iter() + .filter_map(|(&name, &new_count)| { + let old_count = *old_counts.get(name)?; + (old_count != new_count).then_some((name, old_count, new_count)) + }) + .collect(); + + if !differing_counts.is_empty() { + println!("## ⚠ Detected differing instruction counts"); + println!("| Benchmark | Old count | New count |"); + println!("|-----------|----------:|----------:|"); + for (name, old_count, new_count) in differing_counts { + println!("| {name} | {old_count} | {new_count} |"); + } + println!( + "\nChanges in number of opcodes executed indicate that the gas price of the benchmark has changed, \ + which causes it to run out of gas at a different time." + ); + } + } + } + } +} + +fn main() { + Command::from_env().run(); } diff --git a/core/tests/vm-benchmark/src/criterion.rs b/core/tests/vm-benchmark/src/criterion.rs index 9515ac4ef988..024ccf14139f 100644 --- a/core/tests/vm-benchmark/src/criterion.rs +++ b/core/tests/vm-benchmark/src/criterion.rs @@ -57,7 +57,7 @@ struct VmBenchmarkMetrics { static METRICS: vise::Global = vise::Global::new(); #[derive(Debug)] -struct PrometheusRuntime { +pub struct PrometheusRuntime { stop_sender: watch::Sender, _runtime: tokio::runtime::Runtime, } @@ -72,7 +72,7 @@ impl Drop for PrometheusRuntime { } impl PrometheusRuntime { - fn new() -> Option { + pub fn new() -> Option { const PUSH_INTERVAL: Duration = Duration::from_millis(100); let gateway_url = env::var("BENCHMARK_PROMETHEUS_PUSHGATEWAY_URL").ok()?; @@ -164,7 +164,7 @@ thread_local! { static BIN_NAME: SyncOnceCell<&'static str> = SyncOnceCell::new(); -/// Measurement for criterion that exports . +/// Measurement for criterion that exports timing-related metrics. #[derive(Debug)] pub struct MeteredTime { _prometheus: Option, diff --git a/core/tests/vm-benchmark/src/lib.rs b/core/tests/vm-benchmark/src/lib.rs index 4bd008d33196..9c4f547c1de2 100644 --- a/core/tests/vm-benchmark/src/lib.rs +++ b/core/tests/vm-benchmark/src/lib.rs @@ -6,7 +6,7 @@ pub use crate::{ get_load_test_deploy_tx, get_load_test_tx, get_realistic_load_test_tx, get_transfer_tx, LoadTestParams, }, - vm::{BenchmarkingVm, BenchmarkingVmFactory, Fast, Legacy, VmLabel}, + vm::{BenchmarkingVm, BenchmarkingVmFactory, CountInstructions, Fast, Legacy, VmLabel}, }; pub mod criterion; diff --git a/core/tests/vm-benchmark/src/vm.rs b/core/tests/vm-benchmark/src/vm.rs index e198be9ea6b2..bf969e0de5c0 100644 --- a/core/tests/vm-benchmark/src/vm.rs +++ b/core/tests/vm-benchmark/src/vm.rs @@ -72,19 +72,21 @@ pub trait BenchmarkingVmFactory { system_env: SystemEnv, storage: &'static InMemoryStorage, ) -> Self::Instance; +} +pub trait CountInstructions { /// Counts instructions executed by the VM while processing the transaction. fn count_instructions(tx: &Transaction) -> usize; } /// Factory for the new / fast VM. #[derive(Debug)] -pub struct Fast(Tr); +pub struct Fast; -impl BenchmarkingVmFactory for Fast { +impl BenchmarkingVmFactory for Fast { const LABEL: VmLabel = VmLabel::Fast; - type Instance = vm_fast::Vm<&'static InMemoryStorage, Tr>; + type Instance = vm_fast::Vm<&'static InMemoryStorage>; fn create( batch_env: L1BatchEnv, @@ -93,27 +95,30 @@ impl BenchmarkingVmFactory for Fast ) -> Self::Instance { vm_fast::Vm::custom(batch_env, system_env, storage) } +} +impl CountInstructions for Fast { fn count_instructions(tx: &Transaction) -> usize { - let mut vm = BenchmarkingVm::>::default(); - vm.0.push_transaction(tx.clone()); + use vm_fast::interface as vm2; #[derive(Default)] struct InstructionCount(usize); - impl vm_fast::Tracer for InstructionCount { - fn before_instruction< - OP: zksync_vm2::interface::OpcodeType, - S: zksync_vm2::interface::GlobalStateInterface, - >( + + impl vm2::Tracer for InstructionCount { + fn before_instruction( &mut self, _: &mut S, ) { self.0 += 1; } } - let mut tracer = InstructionCount(0); - vm.0.inspect(&mut tracer, InspectExecutionMode::OneTx); + let (system_env, l1_batch_env) = test_env(); + let mut vm = + vm_fast::Vm::<_, InstructionCount>::custom(l1_batch_env, system_env, &*STORAGE); + vm.push_transaction(tx.clone()); + let mut tracer = InstructionCount(0); + vm.inspect(&mut tracer, InspectExecutionMode::OneTx); tracer.0 } } @@ -135,7 +140,9 @@ impl BenchmarkingVmFactory for Legacy { let storage = StorageView::new(storage).to_rc_ptr(); vm_latest::Vm::new(batch_env, system_env, storage) } +} +impl CountInstructions for Legacy { fn count_instructions(tx: &Transaction) -> usize { let mut vm = BenchmarkingVm::::default(); vm.0.push_transaction(tx.clone()); @@ -150,41 +157,44 @@ impl BenchmarkingVmFactory for Legacy { } } +fn test_env() -> (SystemEnv, L1BatchEnv) { + let timestamp = unix_timestamp_ms(); + let system_env = SystemEnv { + zk_porter_available: false, + version: ProtocolVersionId::latest(), + base_system_smart_contracts: SYSTEM_CONTRACTS.clone(), + bootloader_gas_limit: BATCH_COMPUTATIONAL_GAS_LIMIT, + execution_mode: TxExecutionMode::VerifyExecute, + default_validation_computational_gas_limit: BATCH_COMPUTATIONAL_GAS_LIMIT, + chain_id: L2ChainId::from(270), + }; + let l1_batch_env = L1BatchEnv { + previous_batch_hash: None, + number: L1BatchNumber(1), + timestamp, + fee_input: BatchFeeInput::l1_pegged( + 50_000_000_000, // 50 gwei + 250_000_000, // 0.25 gwei + ), + fee_account: Address::random(), + enforced_base_fee: None, + first_l2_block: L2BlockEnv { + number: 1, + timestamp, + prev_block_hash: L2BlockHasher::legacy_hash(L2BlockNumber(0)), + max_virtual_blocks_to_create: 100, + }, + }; + (system_env, l1_batch_env) +} + #[derive(Debug)] pub struct BenchmarkingVm(VM::Instance); impl Default for BenchmarkingVm { fn default() -> Self { - let timestamp = unix_timestamp_ms(); - Self(VM::create( - L1BatchEnv { - previous_batch_hash: None, - number: L1BatchNumber(1), - timestamp, - fee_input: BatchFeeInput::l1_pegged( - 50_000_000_000, // 50 gwei - 250_000_000, // 0.25 gwei - ), - fee_account: Address::random(), - enforced_base_fee: None, - first_l2_block: L2BlockEnv { - number: 1, - timestamp, - prev_block_hash: L2BlockHasher::legacy_hash(L2BlockNumber(0)), - max_virtual_blocks_to_create: 100, - }, - }, - SystemEnv { - zk_porter_available: false, - version: ProtocolVersionId::latest(), - base_system_smart_contracts: SYSTEM_CONTRACTS.clone(), - bootloader_gas_limit: BATCH_COMPUTATIONAL_GAS_LIMIT, - execution_mode: TxExecutionMode::VerifyExecute, - default_validation_computational_gas_limit: BATCH_COMPUTATIONAL_GAS_LIMIT, - chain_id: L2ChainId::from(270), - }, - &STORAGE, - )) + let (system_env, l1_batch_env) = test_env(); + Self(VM::create(l1_batch_env, system_env, &STORAGE)) } } @@ -231,7 +241,7 @@ mod tests { use super::*; use crate::{ get_deploy_tx, get_heavy_load_test_tx, get_load_test_deploy_tx, get_load_test_tx, - get_realistic_load_test_tx, get_transfer_tx, LoadTestParams, + get_realistic_load_test_tx, get_transfer_tx, LoadTestParams, BYTECODES, }; #[test] @@ -282,4 +292,22 @@ mod tests { let res = vm.run_transaction(&get_heavy_load_test_tx(1)); assert_matches!(res.result, ExecutionResult::Success { .. }); } + + #[test] + fn instruction_count_matches_on_both_vms_for_transfer() { + let tx = get_transfer_tx(0); + let legacy_count = Legacy::count_instructions(&tx); + let fast_count = Fast::count_instructions(&tx); + assert_eq!(legacy_count, fast_count); + } + + #[test] + fn instruction_count_matches_on_both_vms_for_benchmark_bytecodes() { + for bytecode in BYTECODES { + let tx = bytecode.deploy_tx(); + let legacy_count = Legacy::count_instructions(&tx); + let fast_count = Fast::count_instructions(&tx); + assert_eq!(legacy_count, fast_count, "bytecode: {}", bytecode.name); + } + } }