From 95b36c5f76ab25d0d03295453f1029789f5ebde1 Mon Sep 17 00:00:00 2001 From: TachyonicBytes Date: Sun, 26 Nov 2023 22:33:33 +0200 Subject: [PATCH] Better instantiation --- Cargo.lock | 150 +++++++++++-------------- dozer-sql/expression/src/builder.rs | 28 ++--- dozer-sql/expression/src/execution.rs | 9 +- dozer-sql/expression/src/wasm/error.rs | 2 + dozer-sql/expression/src/wasm/mod.rs | 34 ++++++ dozer-sql/expression/src/wasm/udf.rs | 38 +++---- dozer-sql/src/errors.rs | 2 - dozer-types/src/models/config.rs | 1 - dozer-types/src/models/udf_config.rs | 1 - json_schemas/dozer.json | 2 +- 10 files changed, 138 insertions(+), 129 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d7cde54c9b..9bd0276481 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1363,7 +1363,7 @@ dependencies = [ "cfg-if", "libc", "miniz_oxide", - "object", + "object 0.31.1", "rustc-demangle", ] @@ -1444,6 +1444,15 @@ dependencies = [ "serde", ] +[[package]] +name = "bincode" +version = "1.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" +dependencies = [ + "serde", +] + [[package]] name = "bincode" version = "2.0.0-rc.3" @@ -2943,15 +2952,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "debugid" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" -dependencies = [ - "uuid", -] - [[package]] name = "deltalake" version = "0.16.5" @@ -3837,7 +3837,7 @@ name = "dozer-cache" version = "0.3.0" dependencies = [ "ahash 0.8.3", - "bincode", + "bincode 2.0.0-rc.3", "clap 4.4.1", "criterion", "dozer-log", @@ -3901,7 +3901,7 @@ dependencies = [ name = "dozer-core" version = "0.3.0" dependencies = [ - "bincode", + "bincode 2.0.0-rc.3", "crossbeam", "daggy", "dozer-log", @@ -4106,7 +4106,7 @@ dependencies = [ "aws-sdk-s3", "aws-smithy-http", "aws-smithy-types", - "bincode", + "bincode 2.0.0-rc.3", "camino", "clap 4.4.1", "dozer-types", @@ -4143,7 +4143,7 @@ dependencies = [ name = "dozer-recordstore" version = "0.3.0" dependencies = [ - "bincode", + "bincode 2.0.0-rc.3", "dozer-storage", "dozer-types", "tempdir", @@ -4154,7 +4154,7 @@ name = "dozer-sql" version = "0.3.0" dependencies = [ "ahash 0.8.3", - "bincode", + "bincode 2.0.0-rc.3", "dozer-core", "dozer-recordstore", "dozer-sql-expression", @@ -4169,7 +4169,6 @@ dependencies = [ "regex", "tempdir", "tokio", - "wasmtime", ] [[package]] @@ -4178,7 +4177,7 @@ version = "0.3.0" dependencies = [ "async-recursion", "bigdecimal", - "bincode", + "bincode 2.0.0-rc.3", "dozer-deno", "dozer-types", "half 2.3.1", @@ -4190,7 +4189,6 @@ dependencies = [ "proptest", "sqlparser 0.35.0", "tokio", - "uuid", "wasmtime", ] @@ -4268,7 +4266,7 @@ dependencies = [ "arrow", "arrow-cast", "arrow-schema", - "bincode", + "bincode 2.0.0-rc.3", "bytes", "chrono", "geo", @@ -5081,6 +5079,15 @@ dependencies = [ "termcolor", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + [[package]] name = "fxprof-processed-profile" version = "0.6.0" @@ -5133,28 +5140,6 @@ dependencies = [ "typenum", ] -[[package]] -name = "fxhash" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" -dependencies = [ - "byteorder", -] - -[[package]] -name = "fxprof-processed-profile" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27d12c0aed7f1e24276a241aadc4cb8ea9f83000f34bc062b7cc2d51e3b0fabd" -dependencies = [ - "bitflags 2.3.3", - "debugid", - "fxhash", - "serde", - "serde_json", -] - [[package]] name = "generic-array" version = "0.14.6" @@ -5278,7 +5263,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c80984affa11d98d1b88b66ac8853f143217b399d3c74116778ff8fdb4ed2e" dependencies = [ "fallible-iterator", - "indexmap 1.9.2", + "indexmap 1.9.3", "stable_deref_trait", ] @@ -6036,9 +6021,9 @@ checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "ittapi" -version = "0.3.4" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41e0d0b7b3b53d92a7e8b80ede3400112a6b8b4c98d1f5b8b16bb787c780582c" +checksum = "25a5c0b993601cad796222ea076565c5d9f337d35592f8622c753724f06d7271" dependencies = [ "anyhow", "ittapi-sys", @@ -6047,9 +6032,9 @@ dependencies = [ [[package]] name = "ittapi-sys" -version = "0.3.4" +version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2f8763c96e54e6d6a0dccc2990d8b5e33e3313aaeae6185921a3f4c1614a77c" +checksum = "cb7b5e473765060536a660eed127f758cf1a810c73e49063264959c60d1727d9" dependencies = [ "cc", ] @@ -6708,6 +6693,15 @@ version = "2.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167" +[[package]] +name = "memfd" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2cffa4ad52c6f791f4f8b15f0c05f9824b2ced1160e88cc393d64fff9a8ac64" +dependencies = [ + "rustix 0.38.11", +] + [[package]] name = "memmap2" version = "0.5.10" @@ -6732,15 +6726,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "memfd" -version = "0.6.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2cffa4ad52c6f791f4f8b15f0c05f9824b2ced1160e88cc393d64fff9a8ac64" -dependencies = [ - "rustix 0.38.11", -] - [[package]] name = "memoffset" version = "0.8.0" @@ -6973,7 +6958,7 @@ dependencies = [ "base64 0.21.0", "bigdecimal", "bindgen", - "bitflags 2.3.3", + "bitflags 2.4.0", "bitvec 1.0.1", "byteorder", "bytes", @@ -7355,7 +7340,7 @@ checksum = "03b4680b86d9cfafba8fc491dc9b6df26b68cf40e9e6cd73909194759a63c385" dependencies = [ "crc32fast", "hashbrown 0.13.2", - "indexmap 1.9.2", + "indexmap 1.9.3", "memchr", ] @@ -9521,9 +9506,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.37.24" +version = "0.37.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4279d76516df406a8bd37e7dff53fd37d1a093f997a3c34a5c21658c126db06d" +checksum = "fea8ca367a3a01fe35e6943c400addf443c0f57670e6ec51196f71a4b8762dd2" dependencies = [ "bitflags 1.3.2", "errno 0.3.1", @@ -12283,9 +12268,9 @@ checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" [[package]] name = "wasm-encoder" -version = "0.33.2" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34180c89672b3e4825c3a8db4b61a674f1447afd5fe2445b2d22c3d8b6ea086c" +checksum = "7b09bc5df933a3dabbdb72ae4b6b71be8ae07f58774d5aa41bd20adcd41a235a" dependencies = [ "leb128", ] @@ -12309,7 +12294,7 @@ version = "0.103.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c437373cac5ea84f1113d648d51f71751ffbe3d90c00ae67618cf20d0b5ee7b" dependencies = [ - "indexmap 1.9.2", + "indexmap 1.9.3", "url", ] @@ -12321,11 +12306,11 @@ checksum = "634357e8668774b24c80b210552f3f194e2342a065d6d83845ba22c5817d0770" dependencies = [ "anyhow", "async-trait", - "bincode", + "bincode 1.3.3", "bumpalo", "cfg-if", "fxprof-processed-profile", - "indexmap 1.9.2", + "indexmap 1.9.3", "libc", "log", "object 0.30.4", @@ -12365,11 +12350,11 @@ checksum = "6107809b2d9f5b2fd3ddbaddb3bb92ff8048b62f4030debf1408119ffd38c6cb" dependencies = [ "anyhow", "base64 0.21.0", - "bincode", + "bincode 1.3.3", "directories-next", "file-per-thread-logger", "log", - "rustix 0.37.24", + "rustix 0.37.27", "serde", "sha2 0.10.6", "toml", @@ -12384,8 +12369,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ba489850d9c91c6c5b9e1696ee89e7a69d9796236a005f7e9131b6746e13b6" dependencies = [ "anyhow", - "proc-macro2", - "quote", + "proc-macro2 1.0.63", + "quote 1.0.30", "syn 1.0.109", "wasmtime-component-util", "wasmtime-wit-bindgen", @@ -12446,7 +12431,7 @@ dependencies = [ "anyhow", "cranelift-entity", "gimli", - "indexmap 1.9.2", + "indexmap 1.9.3", "log", "object 0.30.4", "serde", @@ -12464,7 +12449,7 @@ checksum = "23c5127908fdf720614891ec741c13dd70c844e102caa393e2faca1ee68e9bfb" dependencies = [ "cc", "cfg-if", - "rustix 0.37.24", + "rustix 0.37.27", "wasmtime-asm-macros", "windows-sys 0.48.0", ] @@ -12477,7 +12462,7 @@ checksum = "2712eafe829778b426cad0e1769fef944898923dd29f0039e34e0d53ba72b234" dependencies = [ "addr2line 0.19.0", "anyhow", - "bincode", + "bincode 1.3.3", "cfg-if", "cpp_demangle", "gimli", @@ -12502,7 +12487,7 @@ checksum = "65fb78eacf4a6e47260d8ef8cc81ea8ddb91397b2e848b3fb01567adebfe89b5" dependencies = [ "object 0.30.4", "once_cell", - "rustix 0.37.24", + "rustix 0.37.27", ] [[package]] @@ -12525,15 +12510,15 @@ dependencies = [ "anyhow", "cc", "cfg-if", - "indexmap 1.9.2", + "indexmap 1.9.3", "libc", "log", "mach", "memfd", - "memoffset", + "memoffset 0.8.0", "paste", "rand 0.8.5", - "rustix 0.37.24", + "rustix 0.37.27", "wasmtime-asm-macros", "wasmtime-environ", "wasmtime-fiber", @@ -12566,9 +12551,9 @@ dependencies = [ [[package]] name = "wast" -version = "66.0.0" +version = "69.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0da7529bb848d58ab8bf32230fc065b363baee2bd338d5e58c589a1e7d83ad07" +checksum = "efa51b5ad1391943d1bfad537e50f28fe938199ee76b115be6bae83802cd5185" dependencies = [ "leb128", "memchr", @@ -12578,9 +12563,9 @@ dependencies = [ [[package]] name = "wat" -version = "1.0.75" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4780374047c65b6b6e86019093fe80c18b66825eb684df778a4e068282a780e7" +checksum = "74a4c2488d058326466e086a43f5d4ea448241a8d0975e3eb0642c0828be1eb3" dependencies = [ "wast", ] @@ -12938,7 +12923,6 @@ dependencies = [ ] [[package]] -<<<<<<< HEAD name = "winres" version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -12948,8 +12932,6 @@ dependencies = [ ] [[package]] -======= ->>>>>>> 29ea6a9b (Bring wasm udf up to date) name = "wit-parser" version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -12957,10 +12939,10 @@ checksum = "5ca2581061573ef6d1754983d7a9b3ed5871ef859d52708ea9a0f5af32919172" dependencies = [ "anyhow", "id-arena", - "indexmap 1.9.2", + "indexmap 1.9.3", "log", "pulldown-cmark", - "unicode-xid", + "unicode-xid 0.2.4", "url", ] diff --git a/dozer-sql/expression/src/builder.rs b/dozer-sql/expression/src/builder.rs index f0aeca65cd..d59cb13f2c 100644 --- a/dozer-sql/expression/src/builder.rs +++ b/dozer-sql/expression/src/builder.rs @@ -40,7 +40,7 @@ use dozer_types::models::udf_config::OnnxConfig; #[cfg(feature = "wasm")] use dozer_types::models::udf_config::WasmConfig; -#[derive(Clone, PartialEq, Debug)] +#[derive(Clone, Debug)] pub struct ExpressionBuilder { // Must be an aggregation function pub aggregations: Vec, @@ -577,6 +577,7 @@ impl ExpressionBuilder { schema, udfs, ) + .await } #[cfg(not(feature = "wasm"))] @@ -1007,7 +1008,7 @@ impl ExpressionBuilder { } #[cfg(feature = "wasm")] - fn parse_wasm_udf( + async fn parse_wasm_udf( &mut self, name: String, config: &WasmConfig, @@ -1020,35 +1021,36 @@ impl ExpressionBuilder { use crate::wasm::utils::wasm_validate_input_and_return; use std::path::Path; - let args = function - .args - .iter() - .map(|argument| self.parse_sql_function_arg(false, argument, schema, udfs)) - .collect::, Error>>()?; + let mut args = vec![]; + for argument in &function.args { + let arg = self + .parse_sql_function_arg(false, argument, schema, udfs) + .await?; + args.push(arg); + } - let (value_types, return_type) = wasm_validate_input_and_return( + let session = wasm_validate_input_and_return( schema, name.as_str(), Path::new(&config.path.clone()), &args, ) .unwrap(); - let return_type = match return_type { + + let return_type = match session.return_type { wasmtime::ValType::I32 => FieldType::Int, wasmtime::ValType::I64 => FieldType::Int, wasmtime::ValType::F32 => FieldType::Float, wasmtime::ValType::F64 => FieldType::Float, - wasmtime::ValType::V128 => todo!(), - wasmtime::ValType::FuncRef => todo!(), - wasmtime::ValType::ExternRef => todo!(), + _ => todo!(), }; Ok(Expression::WasmUDF { name: name.to_string(), module: config.path.clone(), args, - value_types, return_type, + session, }) } diff --git a/dozer-sql/expression/src/execution.rs b/dozer-sql/expression/src/execution.rs index 685292b58f..2c234e9d4a 100644 --- a/dozer-sql/expression/src/execution.rs +++ b/dozer-sql/expression/src/execution.rs @@ -18,7 +18,8 @@ use dozer_types::types::Record; use dozer_types::types::{Field, FieldType, Schema, SourceDefinition}; #[cfg(feature = "wasm")] -use wasmtime::ValType; +use crate::wasm::WasmSession; + #[derive(Clone, Debug, PartialEq)] pub enum Expression { @@ -106,8 +107,8 @@ pub enum Expression { name: String, module: String, args: Vec, - value_types: Vec, return_type: FieldType, + session: WasmSession, }, } @@ -362,10 +363,10 @@ impl Expression { #[cfg(feature = "wasm")] Expression::WasmUDF { - name, module, args, .. + name: _name, module: _module, args, return_type: _, session } => { use crate::wasm::udf::evaluate_wasm_udf; - evaluate_wasm_udf(schema, name, module, args, record) + evaluate_wasm_udf(schema, args, record, session) } Expression::UnaryOperator { operator, arg } => operator.evaluate(schema, arg, record), Expression::AggregateFunction { fun, args: _ } => { diff --git a/dozer-sql/expression/src/wasm/error.rs b/dozer-sql/expression/src/wasm/error.rs index 71a452018e..8011e40214 100644 --- a/dozer-sql/expression/src/wasm/error.rs +++ b/dozer-sql/expression/src/wasm/error.rs @@ -31,4 +31,6 @@ pub enum Error { WasmInputTypeSizeMismatch(usize, usize), #[error("The WASM function {0} is missing from the module {1}")] WasmFunctionMissing(String, String), + #[error("Could not instantiate WASM module {0}")] + WasmInstantiateError(String), } diff --git a/dozer-sql/expression/src/wasm/mod.rs b/dozer-sql/expression/src/wasm/mod.rs index dba74fe81c..3b4be9c0ef 100644 --- a/dozer-sql/expression/src/wasm/mod.rs +++ b/dozer-sql/expression/src/wasm/mod.rs @@ -1,3 +1,37 @@ pub mod error; pub mod udf; pub mod utils; +use std::fmt; + +use wasmtime::*; + +#[derive(Clone)] +pub struct WasmSession { + /// Used just for printing errors + pub module_path: String, + pub function_name: String, + pub instance_pre: InstancePre<()>, + pub engine: Engine, + pub value_types: Vec, + pub return_type: ValType, +} + +/// Debug implementation for WasmSession. +/// `instance_pre` is omitted from this implementation. +impl fmt::Debug for WasmSession { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("WasmSession") + .field("module_path", &self.function_name) + .field("function_name", &self.function_name) + // Omit the debug values for `InstancePre` and `Engine` + .field("value_types", &self.value_types) + .field("return_type", &self.return_type) + .finish() + } +} + +impl PartialEq for WasmSession { + fn eq(&self, other: &Self) -> bool { + self.value_types == other.value_types && self.return_type == other.return_type + } +} diff --git a/dozer-sql/expression/src/wasm/udf.rs b/dozer-sql/expression/src/wasm/udf.rs index c4c648c74f..9029ff7dc3 100644 --- a/dozer-sql/expression/src/wasm/udf.rs +++ b/dozer-sql/expression/src/wasm/udf.rs @@ -1,4 +1,4 @@ -use super::error::Error::{WasmFunctionMissing, WasmTrap}; +use super::error::Error::{WasmTrap, WasmInstantiateError}; use crate::error::Error::{self, Wasm}; use dozer_types::ordered_float::OrderedFloat; use dozer_types::types::{Field, Record, Schema}; @@ -7,36 +7,27 @@ use wasmtime::*; use crate::execution::Expression; +use super::WasmSession; + pub fn evaluate_wasm_udf( schema: &Schema, - name: &str, - config: &str, - args: &[Expression], + args: &mut [Expression], record: &Record, + session: &WasmSession, ) -> Result { let input_values = args - .iter() + .iter_mut() .map(|arg| arg.evaluate(record, schema)) .collect::, Error>>()?; - let engine = Engine::default(); - let module = Module::from_file(&engine, config).unwrap(); - let mut store = Store::new(&engine, ()); - let instance = Instance::new(&mut store, &module, &[]).unwrap(); - - let wasm_udf_func; - match instance.get_func(&mut store, name) { - Some(func) => { - wasm_udf_func = func; - } - None => { - return Err(Wasm(WasmFunctionMissing( - name.to_string(), - config.to_string(), - ))); - } - } + // Instantiate again, because we cannot pass as `Store` in the WasmSession struct + let mut store = Store::new(&session.engine, ()); + let instance = session.instance_pre.instantiate(&mut store) + .map_err(|_| (WasmInstantiateError(session.module_path.clone())))?; + // Type checking already checked the name of the function + // Get the Func, FuncType, inputs and output of the wasm function + let wasm_udf_func = instance.get_func(&mut store, session.function_name.as_str()).unwrap(); let func_type = wasm_udf_func.ty(&mut store); let param_types = func_type.params(); let mut result_type = func_type.results(); @@ -78,13 +69,14 @@ pub fn evaluate_wasm_udf( }) .collect(); + // Type checking verified this let result = result_type.next().unwrap(); let mut results: [Val; 1] = [Val::I64(0)]; match wasm_udf_func.call(&mut store, &values, &mut results) { Ok(()) => {} Err(trap) => { - return Err(Wasm(WasmTrap(name.to_string(), trap.to_string()))); + return Err(Wasm(WasmTrap(session.function_name.clone(), trap.to_string()))); } } diff --git a/dozer-sql/src/errors.rs b/dozer-sql/src/errors.rs index d75e921b7e..16c61da663 100644 --- a/dozer-sql/src/errors.rs +++ b/dozer-sql/src/errors.rs @@ -12,8 +12,6 @@ use dozer_types::thiserror::Error; use dozer_types::types::{Field, FieldType}; use std::fmt::{Display, Formatter}; -use super::utils::serialize::DeserializationError; - #[derive(Debug, Clone)] pub struct FieldTypes { types: Vec, diff --git a/dozer-types/src/models/config.rs b/dozer-types/src/models/config.rs index 6a1d3b3cca..e6ff9ab5a0 100644 --- a/dozer-types/src/models/config.rs +++ b/dozer-types/src/models/config.rs @@ -68,7 +68,6 @@ pub struct Config { /// Dozer Cloud specific configuration pub cloud: Cloud, - #[prost(message, repeated, tag = "15")] /// UDF specific configuration (eg. !Onnx, Wasm) #[serde(default, skip_serializing_if = "Vec::is_empty")] pub udfs: Vec, diff --git a/dozer-types/src/models/udf_config.rs b/dozer-types/src/models/udf_config.rs index b824e5e233..0528573af1 100644 --- a/dozer-types/src/models/udf_config.rs +++ b/dozer-types/src/models/udf_config.rs @@ -35,7 +35,6 @@ pub struct JavaScriptConfig { #[derive(Debug, Serialize, Deserialize, JsonSchema, Eq, PartialEq, Clone)] pub struct WasmConfig { - #[prost(string)] /// path to the module file pub path: String, } diff --git a/json_schemas/dozer.json b/json_schemas/dozer.json index 7e650f3256..1ef8a22402 100644 --- a/json_schemas/dozer.json +++ b/json_schemas/dozer.json @@ -111,7 +111,7 @@ ] }, "udfs": { - "description": "UDF specific configuration (eg. !Onnx, !Wasm)", + "description": "UDF specific configuration (eg. !Onnx, Wasm)", "type": "array", "items": { "$ref": "#/definitions/UdfConfig"