Add tests and calibration for new VM instantiation cost model

stellar · Mar 22, 2024 · b25cdac · b25cdac
1 parent 94165df
commit b25cdac
Show file tree

Hide file tree

Showing 10 changed files with 720 additions and 169 deletions.
diff --git a/soroban-env-host/benches/common/cost_types/vm_ops.rs b/soroban-env-host/benches/common/cost_types/vm_ops.rs
@@ -69,23 +69,22 @@ pub(crate) use v21::*;
 #[cfg(feature = "next")]
 mod v21 {
     use super::super::wasm_insn_exec::{
-        wasm_module_with_n_data_segments, wasm_module_with_n_elem_segments,
-        wasm_module_with_n_exports, wasm_module_with_n_globals, wasm_module_with_n_imports,
-        wasm_module_with_n_insns, wasm_module_with_n_internal_funcs,
-        wasm_module_with_n_memory_pages, wasm_module_with_n_table_entries,
-        wasm_module_with_n_types,
+        wasm_module_with_n_data_segment_bytes, wasm_module_with_n_data_segments,
+        wasm_module_with_n_elem_segments, wasm_module_with_n_exports, wasm_module_with_n_globals,
+        wasm_module_with_n_imports, wasm_module_with_n_insns, wasm_module_with_n_internal_funcs,
+        wasm_module_with_n_table_entries, wasm_module_with_n_types,
     };
     use super::*;
     use soroban_env_host::{
         cost_runner::{
-            InstantiateWasmDataSegmentsRun, InstantiateWasmElemSegmentsRun,
-            InstantiateWasmExportsRun, InstantiateWasmFunctionsRun, InstantiateWasmGlobalsRun,
-            InstantiateWasmImportsRun, InstantiateWasmInstructionsRun,
-            InstantiateWasmMemoryPagesRun, InstantiateWasmTableEntriesRun, InstantiateWasmTypesRun,
+            InstantiateWasmDataSegmentBytesRun, InstantiateWasmDataSegmentsRun,
+            InstantiateWasmElemSegmentsRun, InstantiateWasmExportsRun, InstantiateWasmFunctionsRun,
+            InstantiateWasmGlobalsRun, InstantiateWasmImportsRun, InstantiateWasmInstructionsRun,
+            InstantiateWasmTableEntriesRun, InstantiateWasmTypesRun, ParseWasmDataSegmentBytesRun,
             ParseWasmDataSegmentsRun, ParseWasmElemSegmentsRun, ParseWasmExportsRun,
             ParseWasmFunctionsRun, ParseWasmGlobalsRun, ParseWasmImportsRun,
-            ParseWasmInstructionsRun, ParseWasmMemoryPagesRun, ParseWasmTableEntriesRun,
-            ParseWasmTypesRun, VmCachedInstantiationRun, VmInstantiationSample,
+            ParseWasmInstructionsRun, ParseWasmTableEntriesRun, ParseWasmTypesRun,
+            VmCachedInstantiationRun, VmInstantiationSample,
         },
         xdr, Host,
     };
@@ -101,7 +100,7 @@ mod v21 {
     pub(crate) struct ParseWasmElemSegmentsMeasure;
     pub(crate) struct ParseWasmImportsMeasure;
     pub(crate) struct ParseWasmExportsMeasure;
-    pub(crate) struct ParseWasmMemoryPagesMeasure;
+    pub(crate) struct ParseWasmDataSegmentBytesMeasure;
 
     pub(crate) struct InstantiateWasmInstructionsMeasure;
     pub(crate) struct InstantiateWasmFunctionsMeasure;
@@ -112,7 +111,7 @@ mod v21 {
     pub(crate) struct InstantiateWasmElemSegmentsMeasure;
     pub(crate) struct InstantiateWasmImportsMeasure;
     pub(crate) struct InstantiateWasmExportsMeasure;
-    pub(crate) struct InstantiateWasmMemoryPagesMeasure;
+    pub(crate) struct InstantiateWasmDataSegmentBytesMeasure;
 
     // Protocol 21 coarse instantiation-phase cost model
     impl_measurement_for_instantiation_cost_type!(
@@ -188,11 +187,11 @@ mod v21 {
         30
     );
     impl_measurement_for_instantiation_cost_type!(
-        ParseWasmMemoryPagesRun,
-        ParseWasmMemoryPagesMeasure,
-        wasm_module_with_n_memory_pages,
+        ParseWasmDataSegmentBytesRun,
+        ParseWasmDataSegmentBytesMeasure,
+        wasm_module_with_n_data_segment_bytes,
         true,
-        30
+        200000
     );
 
     impl_measurement_for_instantiation_cost_type!(
@@ -259,10 +258,10 @@ mod v21 {
         30
     );
     impl_measurement_for_instantiation_cost_type!(
-        InstantiateWasmMemoryPagesRun,
-        InstantiateWasmMemoryPagesMeasure,
-        wasm_module_with_n_memory_pages,
+        InstantiateWasmDataSegmentBytesRun,
+        InstantiateWasmDataSegmentBytesMeasure,
+        wasm_module_with_n_data_segment_bytes,
         true,
-        30
+        200000
     );
 }
diff --git a/soroban-env-host/benches/common/cost_types/wasm_insn_exec.rs b/soroban-env-host/benches/common/cost_types/wasm_insn_exec.rs
@@ -14,22 +14,56 @@ struct WasmModule {
     overhead: u64,
 }
 
+// ModEmitter's default constructors are a little too spartan for our needs, we
+// want our benchmarks to all have at least one imported function and at least
+// one defined and exported function, so we're in the right performance tier.
+// But we also don't want to go changing those constructors since it'll perturb
+// a lot of non-benchmark users.
+trait ModEmitterExt {
+    fn bench_default() -> Self;
+    fn bench_from_configs(mem_pages: u32, elem_count: u32) -> Self;
+    fn add_bench_import(self) -> Self;
+    fn add_bench_export(self) -> Self;
+    fn add_bench_baseline_material(self) -> Self;
+}
+
+impl ModEmitterExt for ModEmitter {
+    fn add_bench_import(mut self) -> Self {
+        self.import_func("t", "_", Arity(0));
+        self
+    }
+    fn add_bench_export(self) -> Self {
+        let mut fe = self.func(Arity(0), 0);
+        fe.push(Symbol::try_from_small_str("pass").unwrap());
+        fe.finish_and_export("default")
+    }
+    fn add_bench_baseline_material(self) -> Self {
+        self.add_bench_import().add_bench_export()
+    }
+
+    fn bench_default() -> Self {
+        Self::add_bench_baseline_material(ModEmitter::default())
+    }
+
+    fn bench_from_configs(mem_pages: u32, elem_count: u32) -> Self {
+        Self::add_bench_baseline_material(ModEmitter::from_configs(mem_pages, elem_count))
+    }
+}
+
 pub fn wasm_module_with_n_internal_funcs(n: usize) -> Vec<u8> {
-    let mut me = ModEmitter::default();
+    let mut me = ModEmitter::bench_default();
     for _ in 0..n {
         let mut fe = me.func(Arity(0), 0);
         fe.push(Symbol::try_from_small_str("pass").unwrap());
         (me, _) = fe.finish();
     }
-    let mut fe = me.func(Arity(0), 0);
-    fe.push(Symbol::try_from_small_str("pass").unwrap());
-    fe.finish_and_export("test").finish()
+    me.finish()
 }
 
 pub fn wasm_module_with_n_insns(n: usize) -> Vec<u8> {
     // We actually emit 4 instructions per loop iteration, so we need to divide by 4.
     let n = 1 + (n / 4);
-    let mut fe = ModEmitter::default().func(Arity(1), 0);
+    let mut fe = ModEmitter::bench_default().func(Arity(1), 0);
     let arg = fe.args[0];
     fe.push(Operand::Const64(1));
     for i in 0..n {
@@ -43,31 +77,27 @@ pub fn wasm_module_with_n_insns(n: usize) -> Vec<u8> {
     fe.finish_and_export("test").finish()
 }
 pub fn wasm_module_with_n_globals(n: usize) -> Vec<u8> {
-    let mut me = ModEmitter::default();
+    let mut me = ModEmitter::bench_default();
     for i in 0..n {
         me.global(ValType::I64, true, &ConstExpr::i64_const(i as i64));
     }
-    let mut fe = me.func(Arity(0), 0);
-    fe.push(Symbol::try_from_small_str("pass").unwrap());
-    fe.finish_and_export("test").finish()
+    me.finish()
 }
 
 pub fn wasm_module_with_n_imports(n: usize) -> Vec<u8> {
-    let mut me = ModEmitter::default();
+    let mut me = ModEmitter::default().add_bench_import();
     let names = Vm::get_all_host_functions();
     for (module, name, arity) in names.iter().take(n) {
         if *module == "t" {
             continue;
         }
         me.import_func(module, name, Arity(*arity));
     }
-    let mut fe = me.func(Arity(0), 0);
-    fe.push(Symbol::try_from_small_str("pass").unwrap());
-    fe.finish_and_export("test").finish()
+    me.add_bench_export().finish()
 }
 
 pub fn wasm_module_with_n_exports(n: usize) -> Vec<u8> {
-    let me = ModEmitter::default();
+    let me = ModEmitter::bench_default();
     let mut fe = me.func(Arity(0), 0);
     fe.push(Symbol::try_from_small_str("pass").unwrap());
     let (mut me, fid) = fe.finish();
@@ -78,7 +108,7 @@ pub fn wasm_module_with_n_exports(n: usize) -> Vec<u8> {
 }
 
 pub fn wasm_module_with_n_table_entries(n: usize) -> Vec<u8> {
-    let me = ModEmitter::from_configs(1, n as u32);
+    let me = ModEmitter::bench_from_configs(1, n as u32);
     let mut fe = me.func(Arity(0), 0);
     fe.push(Symbol::try_from_small_str("pass").unwrap());
     let (mut me, f) = fe.finish();
@@ -88,7 +118,7 @@ pub fn wasm_module_with_n_table_entries(n: usize) -> Vec<u8> {
 }
 
 pub fn wasm_module_with_n_types(mut n: usize) -> Vec<u8> {
-    let mut me = ModEmitter::default();
+    let mut me = ModEmitter::bench_default();
     // There's a max of 1,000,000 types, so we just make a loop
     // that covers more than that many combinations, and break when we've got
     // to the requested number.
@@ -151,13 +181,11 @@ pub fn wasm_module_with_n_types(mut n: usize) -> Vec<u8> {
             }
         }
     }
-    let mut fe = me.func(Arity(0), 0);
-    fe.push(Symbol::try_from_small_str("pass").unwrap());
-    fe.finish_and_export("test").finish()
+    me.finish()
 }
 
 pub fn wasm_module_with_n_elem_segments(n: usize) -> Vec<u8> {
-    let me = ModEmitter::from_configs(1, n as u32);
+    let me = ModEmitter::bench_from_configs(1, n as u32);
     let mut fe = me.func(Arity(0), 0);
     fe.push(Symbol::try_from_small_str("pass").unwrap());
     let (mut me, f) = fe.finish();
@@ -169,22 +197,17 @@ pub fn wasm_module_with_n_elem_segments(n: usize) -> Vec<u8> {
 
 pub fn wasm_module_with_n_data_segments(n: usize) -> Vec<u8> {
     let mem_offset = n as u32 * 1024;
-    let me = ModEmitter::from_configs(1 + mem_offset / 65536, 0);
-    let mut fe = me.func(Arity(0), 0);
-    fe.push(Symbol::try_from_small_str("pass").unwrap());
-    let (mut me, _) = fe.finish();
+    let mut me = ModEmitter::bench_from_configs(1 + mem_offset / 65536, 0);
     for _ in 0..n {
         me.define_data_segment(n as u32 * 1024, vec![1, 2, 3, 4]);
     }
     me.finish()
 }
 
-pub fn wasm_module_with_n_memory_pages(n: usize) -> Vec<u8> {
-    let mut me = ModEmitter::from_configs(n as u32, 0);
-    me.define_data_segment(0, vec![0xff; n * 0x10000]);
-    let mut fe = me.func(Arity(0), 0);
-    fe.push(Symbol::try_from_small_str("pass").unwrap());
-    fe.finish_and_export("test").finish()
+pub fn wasm_module_with_n_data_segment_bytes(n: usize) -> Vec<u8> {
+    let mut me = ModEmitter::bench_from_configs(1 + (n / 0x10000) as u32, 0);
+    me.define_data_segment(0, vec![0xff; n]);
+    me.finish()
 }
 
 fn wasm_module_with_mem_grow(n_pages: usize) -> Vec<u8> {

diff --git a/soroban-env-host/benches/common/mod.rs b/soroban-env-host/benches/common/mod.rs
@@ -96,7 +96,7 @@ pub(crate) fn for_each_host_cost_measurement<B: Benchmark>(
         call_bench::<B, ParseWasmElemSegmentsMeasure>(&mut params)?;
         call_bench::<B, ParseWasmImportsMeasure>(&mut params)?;
         call_bench::<B, ParseWasmExportsMeasure>(&mut params)?;
-        call_bench::<B, ParseWasmMemoryPagesMeasure>(&mut params)?;
+        call_bench::<B, ParseWasmDataSegmentBytesMeasure>(&mut params)?;
 
         call_bench::<B, InstantiateWasmInstructionsMeasure>(&mut params)?;
         call_bench::<B, InstantiateWasmFunctionsMeasure>(&mut params)?;
@@ -107,7 +107,7 @@ pub(crate) fn for_each_host_cost_measurement<B: Benchmark>(
         call_bench::<B, InstantiateWasmElemSegmentsMeasure>(&mut params)?;
         call_bench::<B, InstantiateWasmImportsMeasure>(&mut params)?;
         call_bench::<B, InstantiateWasmExportsMeasure>(&mut params)?;
-        call_bench::<B, InstantiateWasmMemoryPagesMeasure>(&mut params)?;
+        call_bench::<B, InstantiateWasmDataSegmentBytesMeasure>(&mut params)?;
     }
     // These three mem ones are derived analytically, we do not calibrate them typically
     if std::env::var("INCLUDE_ANALYTICAL_COSTTYPES").is_ok() {

diff --git a/soroban-env-host/benches/worst_case_linear_models.rs b/soroban-env-host/benches/worst_case_linear_models.rs
@@ -68,6 +68,87 @@ fn write_cost_params_table<T: Display>(
     tw.flush()
 }
 
+fn correct_multi_variable_models(
+    params: &mut BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>,
+) {
+    // Several cost types actually represent additional terms a cost model that
+    // we're decomposing into multiple variables, such as the cost of VM
+    // instantiation. When we charge these costs, we charge each variable
+    // separately, i.e. to charge a 5-variable cost we'll make 5 calls to the
+    // budget. Only the first of these 5 calls should have a constant factor,
+    // the rest should have zero as their constant (since they only contribute a
+    // new linear term), but the calibration code will have put the same (or
+    // nearly-the-same) nonzero constant term in each `CostComponent`. We
+    // correct this here by zeroing out the constant term in all but the first
+    // `CostComponent` of each set, (and attempting to confirm that they all
+    // have roughly-the-same constant term).
+    use ContractCostType::*;
+    const MULTI_VARIABLE_COST_GROUPS: &[&[ContractCostType]] = &[
+        &[
+            ParseWasmInstructions,
+            ParseWasmFunctions,
+            ParseWasmGlobals,
+            ParseWasmTableEntries,
+            ParseWasmTypes,
+            ParseWasmDataSegments,
+            ParseWasmElemSegments,
+            ParseWasmImports,
+            ParseWasmExports,
+            ParseWasmDataSegmentBytes,
+        ],
+        &[
+            InstantiateWasmInstructions,
+            InstantiateWasmFunctions,
+            InstantiateWasmGlobals,
+            InstantiateWasmTableEntries,
+            InstantiateWasmTypes,
+            InstantiateWasmDataSegments,
+            InstantiateWasmElemSegments,
+            InstantiateWasmImports,
+            InstantiateWasmExports,
+            InstantiateWasmDataSegmentBytes,
+        ],
+    ];
+    for group in MULTI_VARIABLE_COST_GROUPS {
+        let mut iter = group.iter();
+        if let Some(first) = iter.next() {
+            let Some((first_cpu, first_mem)) = params.get(&CostType::Contract(*first)).cloned()
+            else {
+                continue;
+            };
+            for ty in iter {
+                let Some((cpu, mem)) = params.get_mut(&CostType::Contract(*ty)) else {
+                    continue;
+                };
+                let cpu_const_diff_ratio = (cpu.const_term as f64 - first_cpu.const_term as f64)
+                    / first_cpu.const_term as f64;
+                let mem_const_diff_ratio = (mem.const_term as f64 - first_mem.const_term as f64)
+                    / first_mem.const_term as f64;
+                assert!(
+                    cpu_const_diff_ratio < 0.25,
+                    "cost type {:?} has too large a constant CPU term over {:?}: {:?} vs. {:?} ({:?} diff)",
+                    ty,
+                    first,
+                    cpu.const_term,
+                    first_cpu.const_term,
+                    cpu_const_diff_ratio
+                );
+                assert!(
+                    mem_const_diff_ratio < 0.25,
+                    "cost type {:?} has too large a constant memory term over {:?}: {:?} vs. {:?} ({:?} diff)",
+                    ty,
+                    first,
+                    mem.const_term,
+                    first_mem.const_term,
+                    mem_const_diff_ratio
+                );
+                cpu.const_term = 0;
+                mem.const_term = 0;
+            }
+        }
+    }
+}
+
 fn write_budget_params_code(
     params: &BTreeMap<CostType, (MeteredCostComponent, MeteredCostComponent)>,
     wasm_tier_cost: &BTreeMap<WasmInsnTier, u64>,
@@ -335,12 +416,18 @@ fn extract_wasmi_fuel_costs(
 
 #[cfg(all(test, any(target_os = "linux", target_os = "macos")))]
 fn main() -> std::io::Result<()> {
-    let params = if std::env::var("RUN_EXPERIMENT").is_err() {
+    let mut params = if std::env::var("RUN_EXPERIMENT").is_err() {
         for_each_host_cost_measurement::<WorstCaseLinearModels>()?
     } else {
         for_each_experimental_cost_measurement::<WorstCaseLinearModels>()?
     };
-    let params_wasm = for_each_wasm_insn_measurement::<WorstCaseLinearModels>()?;
+    let params_wasm = if std::env::var("SKIP_WASM_INSNS").is_err() {
+        for_each_wasm_insn_measurement::<WorstCaseLinearModels>()?
+    } else {
+        BTreeMap::new()
+    };
+
+    correct_multi_variable_models(&mut params);
 
     let mut tw = TabWriter::new(vec![])
         .padding(5)