From 2887bb91635e1a746bfc803a37bbce59f3e097ff Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 26 Nov 2024 22:32:23 +0000 Subject: [PATCH 01/22] Do not normalize values --- datafusion/common/src/config.rs | 2 +- datafusion/sql/src/planner.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 1ad10d164868..39664af7f208 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -211,7 +211,7 @@ config_namespace! { pub enable_ident_normalization: bool, default = true /// When set to true, SQL parser will normalize options value (convert value to lowercase) - pub enable_options_value_normalization: bool, default = true + pub enable_options_value_normalization: bool, default = false /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index ccb2ccf7126f..51a71906ae4f 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -56,7 +56,7 @@ impl Default for ParserOptions { parse_float_as_decimal: false, enable_ident_normalization: true, support_varchar_with_length: true, - enable_options_value_normalization: true, + enable_options_value_normalization: false, } } } From 813d6345815440b1d9a94a913e172be537ea8378 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 26 Nov 2024 23:25:45 +0000 Subject: [PATCH 02/22] Fix tests & update docs --- datafusion/sqllogictest/test_files/information_schema.slt | 4 ++-- docs/source/user-guide/configs.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/sqllogictest/test_files/information_schema.slt b/datafusion/sqllogictest/test_files/information_schema.slt index 4d51a61c8a52..ce348570c530 100644 --- a/datafusion/sqllogictest/test_files/information_schema.slt +++ b/datafusion/sqllogictest/test_files/information_schema.slt @@ -258,7 +258,7 @@ datafusion.optimizer.skip_failed_rules false datafusion.optimizer.top_down_join_key_reordering true datafusion.sql_parser.dialect generic datafusion.sql_parser.enable_ident_normalization true -datafusion.sql_parser.enable_options_value_normalization true +datafusion.sql_parser.enable_options_value_normalization false datafusion.sql_parser.parse_float_as_decimal false datafusion.sql_parser.support_varchar_with_length true @@ -351,7 +351,7 @@ datafusion.optimizer.skip_failed_rules false When set to true, the logical plan datafusion.optimizer.top_down_join_key_reordering true When set to true, the physical plan optimizer will run a top down process to reorder the join keys datafusion.sql_parser.dialect generic Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. datafusion.sql_parser.enable_ident_normalization true When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) -datafusion.sql_parser.enable_options_value_normalization true When set to true, SQL parser will normalize options value (convert value to lowercase) +datafusion.sql_parser.enable_options_value_normalization false When set to true, SQL parser will normalize options value (convert value to lowercase) datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL parser will parse float as decimal type datafusion.sql_parser.support_varchar_with_length true If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 6a49fda668a9..a34e1360d6cc 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -36,7 +36,7 @@ If the value in the environment variable cannot be cast to the type of the confi Environment variables are read during `SessionConfig` initialisation so they must be set beforehand and will not affect running sessions. | key | default | description | -| ----------------------------------------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ----------------------------------------------------------------------- |---------------------------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | datafusion.catalog.create_default_catalog_and_schema | true | Whether the default catalog and schema should be created automatically. | | datafusion.catalog.default_catalog | datafusion | The default catalog name - this impacts what SQL queries use if not specified | | datafusion.catalog.default_schema | public | The default schema name - this impacts what SQL queries use if not specified | @@ -122,6 +122,6 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.explain.show_schema | false | When set to true, the explain statement will print schema information | | datafusion.sql_parser.parse_float_as_decimal | false | When set to true, SQL parser will parse float as decimal type | | datafusion.sql_parser.enable_ident_normalization | true | When set to true, SQL parser will normalize ident (convert ident to lowercase when not quoted) | -| datafusion.sql_parser.enable_options_value_normalization | true | When set to true, SQL parser will normalize options value (convert value to lowercase) | +| datafusion.sql_parser.enable_options_value_normalization | false | When set to true, SQL parser will normalize options value (convert value to lowercase) | | datafusion.sql_parser.dialect | generic | Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. | | datafusion.sql_parser.support_varchar_with_length | true | If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a length is specified. The Arrow type system does not have a notion of maximum string length and thus DataFusion can not enforce such limits. | From c3de620c86643e1cf0c2b2b7d06e77df29e9083e Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 26 Nov 2024 23:29:59 +0000 Subject: [PATCH 03/22] Prettier --- docs/source/user-guide/configs.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index a34e1360d6cc..304b0efe5b65 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -36,7 +36,7 @@ If the value in the environment variable cannot be cast to the type of the confi Environment variables are read during `SessionConfig` initialisation so they must be set beforehand and will not affect running sessions. | key | default | description | -| ----------------------------------------------------------------------- |---------------------------| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| ----------------------------------------------------------------------- | ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | | datafusion.catalog.create_default_catalog_and_schema | true | Whether the default catalog and schema should be created automatically. | | datafusion.catalog.default_catalog | datafusion | The default catalog name - this impacts what SQL queries use if not specified | | datafusion.catalog.default_schema | public | The default schema name - this impacts what SQL queries use if not specified | From 7c2b3fe7536278df4c9d4d89928020054bdf00d9 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 3 Dec 2024 18:01:21 +0000 Subject: [PATCH 04/22] Lowercase config params --- datafusion-cli/src/object_storage.rs | 9 ++-- datafusion/common/src/config.rs | 43 +++++++++++++------ datafusion/core/src/datasource/stream.rs | 2 +- datafusion/core/tests/config_from_env.rs | 17 ++++++-- .../test_files/create_external_table.slt | 14 ++++++ .../sqllogictest/test_files/set_variable.slt | 8 ++-- 6 files changed, 66 insertions(+), 27 deletions(-) diff --git a/datafusion-cli/src/object_storage.rs b/datafusion-cli/src/object_storage.rs index 3d999766e03f..a48f7aaa6f0b 100644 --- a/datafusion-cli/src/object_storage.rs +++ b/datafusion-cli/src/object_storage.rs @@ -471,12 +471,13 @@ mod tests { #[tokio::test] async fn s3_object_store_builder() -> Result<()> { - let access_key_id = "fake_access_key_id"; - let secret_access_key = "fake_secret_access_key"; + // "fake" is uppercase to ensure the values are not lowercased when parsed + let access_key_id = "FAKE_access_key_id"; + let secret_access_key = "FAKE_secret_access_key"; let region = "fake_us-east-2"; let endpoint = "endpoint33"; - let session_token = "fake_session_token"; - let location = "s3://bucket/path/file.parquet"; + let session_token = "FAKE_session_token"; + let location = "s3://bucket/path/FAKE/file.parquet"; let table_url = ListingTableUrl::parse(location)?; let scheme = table_url.scheme(); diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 2c9375896446..fa073c939f6d 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -38,7 +38,7 @@ use crate::{DataFusionError, Result}; /// /// Amazing config /// pub struct MyConfig { /// /// Field 1 doc -/// field1: String, default = "".to_string() +/// field1: String, transform = str::to_lowercase, default = "".to_string() /// /// /// Field 2 doc /// field2: usize, default = 232 @@ -67,9 +67,12 @@ use crate::{DataFusionError, Result}; /// fn set(&mut self, key: &str, value: &str) -> Result<()> { /// let (key, rem) = key.split_once('.').unwrap_or((key, "")); /// match key { -/// "field1" => self.field1.set(rem, value), -/// "field2" => self.field2.set(rem, value), -/// "field3" => self.field3.set(rem, value), +/// "field1" => { +/// let value = str::to_lowercase(value); +/// self.field1.set(rem, value.as_ref()) +/// }, +/// "field2" => self.field2.set(rem, value.as_ref()), +/// "field3" => self.field3.set(rem, value.as_ref()), /// _ => _internal_err!( /// "Config value \"{}\" not found on MyConfig", /// key @@ -102,7 +105,6 @@ use crate::{DataFusionError, Result}; /// ``` /// /// NB: Misplaced commas may result in nonsensical errors -/// #[macro_export] macro_rules! config_namespace { ( @@ -110,7 +112,7 @@ macro_rules! config_namespace { $vis:vis struct $struct_name:ident { $( $(#[doc = $d:tt])* - $field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr + $field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr )*$(,)* } ) => { @@ -127,9 +129,13 @@ macro_rules! config_namespace { impl ConfigField for $struct_name { fn set(&mut self, key: &str, value: &str) -> Result<()> { let (key, rem) = key.split_once('.').unwrap_or((key, "")); + match key { $( - stringify!($field_name) => self.$field_name.set(rem, value), + stringify!($field_name) => { + $(let value = $transform(value);)? + self.$field_name.set(rem, value.as_ref()) + }, )* _ => return _config_err!( "Config value \"{}\" not found on {}", key, stringify!($struct_name) @@ -216,7 +222,8 @@ config_namespace! { /// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic, /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi. - pub dialect: String, default = "generic".to_string() + pub dialect: String, default = "generic".to_string() // no need to lowercase because + // [`sqlparser::dialect_from_str`] is case-insensitive /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but /// ignore the length. If false, error if a `VARCHAR` with a length is @@ -431,7 +438,7 @@ config_namespace! { /// /// Note that this default setting is not the same as /// the default parquet writer setting. - pub compression: Option, default = Some("zstd(3)".into()) + pub compression: Option, transform = str::to_lowercase, default = Some("zstd(3)".into()) /// (writing) Sets if dictionary encoding is enabled. If NULL, uses /// default parquet writer setting @@ -444,7 +451,7 @@ config_namespace! { /// Valid values are: "none", "chunk", and "page" /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub statistics_enabled: Option, default = Some("page".into()) + pub statistics_enabled: Option, transform = str::to_lowercase, default = Some("page".into()) /// (writing) Sets max statistics size for any column. If NULL, uses /// default parquet writer setting @@ -470,7 +477,7 @@ config_namespace! { /// delta_byte_array, rle_dictionary, and byte_stream_split. /// These values are not case sensitive. If NULL, uses /// default parquet writer setting - pub encoding: Option, default = None + pub encoding: Option, transform = str::to_lowercase, default = None /// (writing) Use any available bloom filters when reading parquet files pub bloom_filter_on_read: bool, default = true @@ -973,16 +980,24 @@ impl ConfigField for Option { #[macro_export] macro_rules! config_field { - ($t:ty) => { + ($t:ty $(, $transform:expr)?) => { impl ConfigField for $t { fn visit(&self, v: &mut V, key: &str, description: &'static str) { v.some(key, self, description) } fn set(&mut self, _: &str, value: &str) -> Result<()> { + $( + let value = $transform(&value); + )? + *self = value.parse().map_err(|e| { DataFusionError::Context( - format!(concat!("Error parsing {} as ", stringify!($t),), value), + format!( + "Error parsing '{}' as {}", + value, + stringify!($t), + ), Box::new(DataFusionError::External(Box::new(e))), ) })?; @@ -993,7 +1008,7 @@ macro_rules! config_field { } config_field!(String); -config_field!(bool); +config_field!(bool, str::to_lowercase); config_field!(usize); config_field!(f64); config_field!(u64); diff --git a/datafusion/core/src/datasource/stream.rs b/datafusion/core/src/datasource/stream.rs index d8fad5b6cd37..2cea37fe17e2 100644 --- a/datafusion/core/src/datasource/stream.rs +++ b/datafusion/core/src/datasource/stream.rs @@ -62,7 +62,7 @@ impl TableProviderFactory for StreamTableFactory { let header = if let Ok(opt) = cmd .options .get("format.has_header") - .map(|has_header| bool::from_str(has_header)) + .map(|has_header| bool::from_str(has_header.to_lowercase().as_str())) .transpose() { opt.unwrap_or(false) diff --git a/datafusion/core/tests/config_from_env.rs b/datafusion/core/tests/config_from_env.rs index a5a5a4524e60..76bde4a37d6d 100644 --- a/datafusion/core/tests/config_from_env.rs +++ b/datafusion/core/tests/config_from_env.rs @@ -22,10 +22,19 @@ use std::env; fn from_env() { // Note: these must be a single test to avoid interference from concurrent execution let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS"; - env::set_var(env_key, "true"); - let config = ConfigOptions::from_env().unwrap(); + // valid testing in different cases + for bool_option in ["true", "TRUE", "True"] { + env::set_var(env_key, bool_option); + let config = ConfigOptions::from_env().unwrap(); + env::remove_var(env_key); + assert!(config.optimizer.filter_null_join_keys); + } + + // invalid testing + env::set_var(env_key, "ttruee"); + let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); + assert_eq!(err, "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`"); env::remove_var(env_key); - assert!(config.optimizer.filter_null_join_keys); let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE"; @@ -37,7 +46,7 @@ fn from_env() { // for invalid testing env::set_var(env_key, "abc"); let err = ConfigOptions::from_env().unwrap_err().strip_backtrace(); - assert_eq!(err, "Error parsing abc as usize\ncaused by\nExternal error: invalid digit found in string"); + assert_eq!(err, "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string"); env::remove_var(env_key); let config = ConfigOptions::from_env().unwrap(); diff --git a/datafusion/sqllogictest/test_files/create_external_table.slt b/datafusion/sqllogictest/test_files/create_external_table.slt index ed001cf9f84c..6a63ea1cd3e4 100644 --- a/datafusion/sqllogictest/test_files/create_external_table.slt +++ b/datafusion/sqllogictest/test_files/create_external_table.slt @@ -226,6 +226,20 @@ OPTIONS ( has_header false, compression gzip); +# Verify that some options are case insensitive +statement ok +CREATE EXTERNAL TABLE IF NOT EXISTS region ( + r_regionkey BIGINT, + r_name VARCHAR, + r_comment VARCHAR, + r_rev VARCHAR, +) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl' +OPTIONS ( + format.delimiter '|', + has_header FALSE, + compression GZIP); + + # Create an external parquet table and infer schema to order by # query should succeed diff --git a/datafusion/sqllogictest/test_files/set_variable.slt b/datafusion/sqllogictest/test_files/set_variable.slt index 6f19c9f4d42f..bb4ac920d032 100644 --- a/datafusion/sqllogictest/test_files/set_variable.slt +++ b/datafusion/sqllogictest/test_files/set_variable.slt @@ -93,10 +93,10 @@ datafusion.execution.coalesce_batches false statement ok set datafusion.catalog.information_schema = true -statement error DataFusion error: Error parsing 1 as bool +statement error DataFusion error: Error parsing '1' as bool SET datafusion.execution.coalesce_batches to 1 -statement error DataFusion error: Error parsing abc as bool +statement error DataFusion error: Error parsing 'abc' as bool SET datafusion.execution.coalesce_batches to abc # set u64 variable @@ -132,10 +132,10 @@ datafusion.execution.batch_size 2 statement ok set datafusion.catalog.information_schema = true -statement error DataFusion error: Error parsing -1 as usize +statement error DataFusion error: Error parsing '-1' as usize SET datafusion.execution.batch_size to -1 -statement error DataFusion error: Error parsing abc as usize +statement error DataFusion error: Error parsing 'abc' as usize SET datafusion.execution.batch_size to abc statement error External error: invalid digit found in string From 9146e4b7c0dda51a226259038394c006479e6b91 Mon Sep 17 00:00:00 2001 From: blaginin Date: Thu, 5 Dec 2024 22:33:01 +0000 Subject: [PATCH 05/22] Add snap to CLI & set up AWS mock --- datafusion-cli/Cargo.lock | 72 +++++++++++++++++++ datafusion-cli/Cargo.toml | 2 + datafusion-cli/tests/integration_setup.bash | 16 +++++ datafusion-cli/tests/snaphots.rs | 29 ++++++++ ...ts__load_local_csv@load_local_csv.sql.snap | 26 +++++++ ...snaphots__load_s3_csv@load_s3_csv.sql.snap | 26 +++++++ datafusion-cli/tests/sql/load_local_csv.sql | 6 ++ datafusion-cli/tests/sql/load_s3_csv.sql | 12 ++++ 8 files changed, 189 insertions(+) create mode 100644 datafusion-cli/tests/integration_setup.bash create mode 100644 datafusion-cli/tests/snaphots.rs create mode 100644 datafusion-cli/tests/snapshots/snaphots__load_local_csv@load_local_csv.sql.snap create mode 100644 datafusion-cli/tests/snapshots/snaphots__load_s3_csv@load_s3_csv.sql.snap create mode 100644 datafusion-cli/tests/sql/load_local_csv.sql create mode 100644 datafusion-cli/tests/sql/load_s3_csv.sql diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index c871b2fdda08..9155c2fb8e67 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1053,6 +1053,18 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "console" +version = "0.15.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e1f83fc076bd6dd27517eacdf25fef6c4dfe5f1d7448bafaaf3a26f13b5e4eb" +dependencies = [ + "encode_unicode", + "lazy_static", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "const-random" version = "0.1.18" @@ -1289,6 +1301,8 @@ dependencies = [ "dirs", "env_logger", "futures", + "insta", + "insta-cmd", "mimalloc", "object_store", "parking_lot", @@ -1709,6 +1723,12 @@ version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "encode_unicode" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" + [[package]] name = "endian-type" version = "0.1.2" @@ -1957,6 +1977,19 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "globset" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + [[package]] name = "h2" version = "0.3.26" @@ -2401,6 +2434,33 @@ dependencies = [ "hashbrown 0.15.2", ] +[[package]] +name = "insta" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9ffc4d4892617c50a928c52b2961cb5174b6fc6ebf252b2fac9d21955c48b8" +dependencies = [ + "console", + "globset", + "lazy_static", + "linked-hash-map", + "regex", + "serde", + "similar", + "walkdir", +] + +[[package]] +name = "insta-cmd" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffeeefa927925cced49ccb01bf3e57c9d4cd132df21e576eb9415baeab2d3de6" +dependencies = [ + "insta", + "serde", + "serde_json", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -2579,6 +2639,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -3688,6 +3754,12 @@ dependencies = [ "libc", ] +[[package]] +name = "similar" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" + [[package]] name = "siphasher" version = "0.3.11" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index be2bdbc11114..f6851562eb2d 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -67,3 +67,5 @@ assert_cmd = "2.0" ctor = "0.2.0" predicates = "3.0" rstest = "0.22" +insta = { version = "1.41.1", features = ["glob", "filters"] } +insta-cmd = "0.6.0" diff --git a/datafusion-cli/tests/integration_setup.bash b/datafusion-cli/tests/integration_setup.bash new file mode 100644 index 000000000000..e1dc9eb0e86d --- /dev/null +++ b/datafusion-cli/tests/integration_setup.bash @@ -0,0 +1,16 @@ +# you should have localstack up, e.g by +#$ LOCALSTACK_VERSION=sha256:a0b79cb2430f1818de2c66ce89d41bba40f5a1823410f5a7eaf3494b692eed97 +#$ podman run -d -p 4566:4566 localstack/localstack@$LOCALSTACK_VERSION +#$ podman run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2 + +export TEST_INTEGRATION=1 +export AWS_DEFAULT_REGION=us-east-1 +export AWS_ACCESS_KEY_ID=test +export AWS_SECRET_ACCESS_KEY=test +export AWS_ENDPOINT=http://localhost:4566 +export AWS_ALLOW_HTTP=true +export AWS_BUCKET_NAME=test-bucket + + +aws s3 mb s3://test-bucket --endpoint-url=$AWS_ENDPOINT +aws s3 cp ../datafusion/core/tests/data/cars.csv s3://test-bucket/cars.csv --endpoint-url=$AWS_ENDPOINT diff --git a/datafusion-cli/tests/snaphots.rs b/datafusion-cli/tests/snaphots.rs new file mode 100644 index 000000000000..0bc9de36ad28 --- /dev/null +++ b/datafusion-cli/tests/snaphots.rs @@ -0,0 +1,29 @@ +use insta::glob; +use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; +use std::process::Command; +use std::{env, fs}; + +fn cli() -> Command { + Command::new(get_cargo_bin("datafusion-cli")) +} + +#[test] +fn test_snapshots() { + if env::var("TEST_INTEGRATION").is_err() { + eprintln!("Skipping integration tests"); + return; + } + + let mut settings = insta::Settings::clone_current(); + settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); + settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); + + let _bound = settings.bind_to_scope(); + + glob!("sql/*.sql", |path| { + let input = fs::read_to_string(path).unwrap(); + let file = path.file_stem().unwrap().to_string_lossy(); + + assert_cmd_snapshot!(file.as_ref(), cli().pass_stdin(input)) + }); +} diff --git a/datafusion-cli/tests/snapshots/snaphots__load_local_csv@load_local_csv.sql.snap b/datafusion-cli/tests/snapshots/snaphots__load_local_csv@load_local_csv.sql.snap new file mode 100644 index 000000000000..0a6c4c022d85 --- /dev/null +++ b/datafusion-cli/tests/snapshots/snaphots__load_local_csv@load_local_csv.sql.snap @@ -0,0 +1,26 @@ +--- +source: tests/snaphots.rs +info: + program: datafusion-cli + args: [] + stdin: "CREATE EXTERNAL TABLE cars\nSTORED AS CSV\nLOCATION '../datafusion/core/tests/data/cars.csv'\nOPTIONS ('has_header' 'TRUE');\n\nselect * from cars limit 1;" +input_file: tests/sql/load_local_csv.sql +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] +0 row(s) fetched. +[ELAPSED] + ++-----+-------+---------------------+ +| car | speed | time | ++-----+-------+---------------------+ +| red | 20.0 | 1996-04-12T12:05:03 | ++-----+-------+---------------------+ +1 row(s) fetched. +[ELAPSED] + +\q + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/snaphots__load_s3_csv@load_s3_csv.sql.snap b/datafusion-cli/tests/snapshots/snaphots__load_s3_csv@load_s3_csv.sql.snap new file mode 100644 index 000000000000..a60d958d77bf --- /dev/null +++ b/datafusion-cli/tests/snapshots/snaphots__load_s3_csv@load_s3_csv.sql.snap @@ -0,0 +1,26 @@ +--- +source: tests/snaphots.rs +info: + program: datafusion-cli + args: [] + stdin: "CREATE EXTERNAL TABLE cars\nSTORED AS CSV\nOPTIONS(\n 'aws.access_key_id' 'test',\n 'aws.secret_access_key' 'test',\n 'aws.region' 'us-east-1',\n 'aws.endpoint' 'http://localhost:4566',\n 'aws.allow_http' 'true'\n)\nLOCATION 's3://test-bucket/cars.csv';\n\nselect * from cars limit 1;" +input_file: tests/sql/load_s3_csv.sql +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] +0 row(s) fetched. +[ELAPSED] + ++-----+-------+---------------------+ +| car | speed | time | ++-----+-------+---------------------+ +| red | 20.0 | 1996-04-12T12:05:03 | ++-----+-------+---------------------+ +1 row(s) fetched. +[ELAPSED] + +\q + +----- stderr ----- diff --git a/datafusion-cli/tests/sql/load_local_csv.sql b/datafusion-cli/tests/sql/load_local_csv.sql new file mode 100644 index 000000000000..00147a5881c8 --- /dev/null +++ b/datafusion-cli/tests/sql/load_local_csv.sql @@ -0,0 +1,6 @@ +CREATE EXTERNAL TABLE cars +STORED AS CSV +LOCATION '../datafusion/core/tests/data/cars.csv' +OPTIONS ('has_header' 'TRUE'); + +select * from cars limit 1; \ No newline at end of file diff --git a/datafusion-cli/tests/sql/load_s3_csv.sql b/datafusion-cli/tests/sql/load_s3_csv.sql new file mode 100644 index 000000000000..57c51ad71b4f --- /dev/null +++ b/datafusion-cli/tests/sql/load_s3_csv.sql @@ -0,0 +1,12 @@ +CREATE EXTERNAL TABLE cars +STORED AS CSV +OPTIONS( + 'aws.access_key_id' 'test', + 'aws.secret_access_key' 'test', + 'aws.region' 'us-east-1', + 'aws.endpoint' 'http://localhost:4566', + 'aws.allow_http' 'true' +) +LOCATION 's3://test-bucket/cars.csv'; + +select * from cars limit 1; \ No newline at end of file From 9d856c325c48e97db57655a470c4f4ed0cdfd812 Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 6 Dec 2024 16:47:00 +0000 Subject: [PATCH 06/22] Refactor tests --- datafusion-cli/tests/cli_integration.rs | 82 +++++++++++++++---- datafusion-cli/tests/snaphots.rs | 29 ------- .../snapshots/cli_format_test@automatic.snap | 21 +++++ .../tests/snapshots/cli_format_test@csv.snap | 18 ++++ .../tests/snapshots/cli_format_test@json.snap | 17 ++++ .../snapshots/cli_format_test@nd-json.snap | 17 ++++ .../snapshots/cli_format_test@table.snap | 21 +++++ .../tests/snapshots/cli_format_test@tsv.snap | 18 ++++ .../snapshots/cli_quick_test@batch_size.snap | 21 +++++ .../tests/snapshots/cli_quick_test@files.snap | 19 +++++ .../snapshots/cli_quick_test@statements.snap | 24 ++++++ ...orage_integration@load_local_csv.sql.snap} | 4 +- ..._storage_integration@load_s3_csv.sql.snap} | 4 +- .../test_storage_integration@select.sql.snap | 23 ++++++ datafusion-cli/tests/sql/load_local_csv.sql | 4 +- datafusion-cli/tests/sql/load_s3_csv.sql | 4 +- .../tests/{data/sql.txt => sql/select.sql} | 0 17 files changed, 272 insertions(+), 54 deletions(-) delete mode 100644 datafusion-cli/tests/snaphots.rs create mode 100644 datafusion-cli/tests/snapshots/cli_format_test@automatic.snap create mode 100644 datafusion-cli/tests/snapshots/cli_format_test@csv.snap create mode 100644 datafusion-cli/tests/snapshots/cli_format_test@json.snap create mode 100644 datafusion-cli/tests/snapshots/cli_format_test@nd-json.snap create mode 100644 datafusion-cli/tests/snapshots/cli_format_test@table.snap create mode 100644 datafusion-cli/tests/snapshots/cli_format_test@tsv.snap create mode 100644 datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap create mode 100644 datafusion-cli/tests/snapshots/cli_quick_test@files.snap create mode 100644 datafusion-cli/tests/snapshots/cli_quick_test@statements.snap rename datafusion-cli/tests/snapshots/{snaphots__load_local_csv@load_local_csv.sql.snap => test_storage_integration@load_local_csv.sql.snap} (78%) rename datafusion-cli/tests/snapshots/{snaphots__load_s3_csv@load_s3_csv.sql.snap => test_storage_integration@load_s3_csv.sql.snap} (80%) create mode 100644 datafusion-cli/tests/snapshots/test_storage_integration@select.sql.snap rename datafusion-cli/tests/{data/sql.txt => sql/select.sql} (100%) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 27cabf15afec..d90156a79e5f 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -17,10 +17,22 @@ use std::process::Command; -use assert_cmd::prelude::{CommandCargoExt, OutputAssertExt}; -use predicates::prelude::predicate; use rstest::rstest; +use insta::{glob, Settings}; +use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; +use std::{env, fs}; + +fn cli() -> Command { + Command::new(get_cargo_bin("datafusion-cli")) +} + +fn make_settings() -> Settings { + let mut settings = Settings::clone_current(); + settings.set_prepend_module_to_snapshot(false); + settings +} + #[cfg(test)] #[ctor::ctor] fn init() { @@ -28,31 +40,67 @@ fn init() { let _ = env_logger::try_init(); } -// Disabled due to https://github.com/apache/datafusion/issues/10793 -#[cfg(not(target_family = "windows"))] #[rstest] -#[case::exec_from_commands( - ["--command", "select 1", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n" -)] #[case::exec_multiple_statements( - ["--command", "select 1; select 2;", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n[{\"Int64(2)\":2}]\n" + "statements", + ["--command", "select 1; select 2;", "-q"], )] #[case::exec_from_files( - ["--file", "tests/data/sql.txt", "--format", "json", "-q"], - "[{\"Int64(1)\":1}]\n" + "files", + ["--file", "tests/sql/select.sql", "-q"], )] #[case::set_batch_size( - ["--command", "show datafusion.execution.batch_size", "--format", "json", "-q", "-b", "1"], - "[{\"name\":\"datafusion.execution.batch_size\",\"value\":\"1\"}]\n" + "batch_size", + ["--command", "show datafusion.execution.batch_size", "-q", "-b", "1"], )] #[test] fn cli_quick_test<'a>( + #[case] snapshot_name: &'a str, #[case] args: impl IntoIterator, - #[case] expected: &str, ) { - let mut cmd = Command::cargo_bin("datafusion-cli").unwrap(); + let mut settings = make_settings(); + settings.set_snapshot_suffix(snapshot_name); + let _bound = settings.bind_to_scope(); + + let mut cmd = cli(); cmd.args(args); - cmd.assert().stdout(predicate::eq(expected)); + + assert_cmd_snapshot!("cli_quick_test", cmd); +} + +#[rstest] +#[case("csv")] +#[case("tsv")] +#[case("table")] +#[case("json")] +#[case("nd-json")] +#[case("automatic")] +#[test] +fn cli_format_test<'a>(#[case] format: &'a str) { + let mut settings = make_settings(); + settings.set_snapshot_suffix(format); + let _bound = settings.bind_to_scope(); + + let mut cmd = cli(); + cmd.args(["--command", "select 1", "-q", "--format", format]); + + assert_cmd_snapshot!("cli_format_test", cmd); +} + +#[test] +fn test_storage_integration() { + if env::var("TEST_STORAGE_INTEGRATION").is_err() { + eprintln!("Skipping external storages integration tests"); + return; + } + + let mut settings = make_settings(); + settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); + settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); + let _bound = settings.bind_to_scope(); + + glob!("sql/*.sql", |path| { + let input = fs::read_to_string(path).unwrap(); + assert_cmd_snapshot!("test_storage_integration", cli().pass_stdin(input)) + }); } diff --git a/datafusion-cli/tests/snaphots.rs b/datafusion-cli/tests/snaphots.rs deleted file mode 100644 index 0bc9de36ad28..000000000000 --- a/datafusion-cli/tests/snaphots.rs +++ /dev/null @@ -1,29 +0,0 @@ -use insta::glob; -use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; -use std::process::Command; -use std::{env, fs}; - -fn cli() -> Command { - Command::new(get_cargo_bin("datafusion-cli")) -} - -#[test] -fn test_snapshots() { - if env::var("TEST_INTEGRATION").is_err() { - eprintln!("Skipping integration tests"); - return; - } - - let mut settings = insta::Settings::clone_current(); - settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); - settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); - - let _bound = settings.bind_to_scope(); - - glob!("sql/*.sql", |path| { - let input = fs::read_to_string(path).unwrap(); - let file = path.file_stem().unwrap().to_string_lossy(); - - assert_cmd_snapshot!(file.as_ref(), cli().pass_stdin(input)) - }); -} diff --git a/datafusion-cli/tests/snapshots/cli_format_test@automatic.snap b/datafusion-cli/tests/snapshots/cli_format_test@automatic.snap new file mode 100644 index 000000000000..2591f493e90a --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format_test@automatic.snap @@ -0,0 +1,21 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - automatic +--- +success: true +exit_code: 0 +----- stdout ----- ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format_test@csv.snap b/datafusion-cli/tests/snapshots/cli_format_test@csv.snap new file mode 100644 index 000000000000..c41b042298eb --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format_test@csv.snap @@ -0,0 +1,18 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - csv +--- +success: true +exit_code: 0 +----- stdout ----- +Int64(1) +1 + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format_test@json.snap b/datafusion-cli/tests/snapshots/cli_format_test@json.snap new file mode 100644 index 000000000000..8f804a337cce --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format_test@json.snap @@ -0,0 +1,17 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - json +--- +success: true +exit_code: 0 +----- stdout ----- +[{"Int64(1)":1}] + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format_test@nd-json.snap b/datafusion-cli/tests/snapshots/cli_format_test@nd-json.snap new file mode 100644 index 000000000000..7b4ce1e2530c --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format_test@nd-json.snap @@ -0,0 +1,17 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - nd-json +--- +success: true +exit_code: 0 +----- stdout ----- +{"Int64(1)":1} + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format_test@table.snap b/datafusion-cli/tests/snapshots/cli_format_test@table.snap new file mode 100644 index 000000000000..99914182462a --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format_test@table.snap @@ -0,0 +1,21 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - table +--- +success: true +exit_code: 0 +----- stdout ----- ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_format_test@tsv.snap b/datafusion-cli/tests/snapshots/cli_format_test@tsv.snap new file mode 100644 index 000000000000..968268c31dd5 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_format_test@tsv.snap @@ -0,0 +1,18 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1 + - "-q" + - "--format" + - tsv +--- +success: true +exit_code: 0 +----- stdout ----- +Int64(1) +1 + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap new file mode 100644 index 000000000000..c27d527df0b6 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test@batch_size.snap @@ -0,0 +1,21 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - show datafusion.execution.batch_size + - "-q" + - "-b" + - "1" +--- +success: true +exit_code: 0 +----- stdout ----- ++---------------------------------+-------+ +| name | value | ++---------------------------------+-------+ +| datafusion.execution.batch_size | 1 | ++---------------------------------+-------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@files.snap b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap new file mode 100644 index 000000000000..7c44e41729a1 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test@files.snap @@ -0,0 +1,19 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--file" + - tests/sql/select.sql + - "-q" +--- +success: true +exit_code: 0 +----- stdout ----- ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap new file mode 100644 index 000000000000..3b975bb6a927 --- /dev/null +++ b/datafusion-cli/tests/snapshots/cli_quick_test@statements.snap @@ -0,0 +1,24 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: + - "--command" + - select 1; select 2; + - "-q" +--- +success: true +exit_code: 0 +----- stdout ----- ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ ++----------+ +| Int64(2) | ++----------+ +| 2 | ++----------+ + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/snaphots__load_local_csv@load_local_csv.sql.snap b/datafusion-cli/tests/snapshots/test_storage_integration@load_local_csv.sql.snap similarity index 78% rename from datafusion-cli/tests/snapshots/snaphots__load_local_csv@load_local_csv.sql.snap rename to datafusion-cli/tests/snapshots/test_storage_integration@load_local_csv.sql.snap index 0a6c4c022d85..029d5f8d5b9f 100644 --- a/datafusion-cli/tests/snapshots/snaphots__load_local_csv@load_local_csv.sql.snap +++ b/datafusion-cli/tests/snapshots/test_storage_integration@load_local_csv.sql.snap @@ -1,9 +1,9 @@ --- -source: tests/snaphots.rs +source: tests/cli_integration.rs info: program: datafusion-cli args: [] - stdin: "CREATE EXTERNAL TABLE cars\nSTORED AS CSV\nLOCATION '../datafusion/core/tests/data/cars.csv'\nOPTIONS ('has_header' 'TRUE');\n\nselect * from cars limit 1;" + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION '../datafusion/core/tests/data/cars.csv'\nOPTIONS ('has_header' 'TRUE');\n\nSELECT * FROM CARS limit 1;" input_file: tests/sql/load_local_csv.sql --- success: true diff --git a/datafusion-cli/tests/snapshots/snaphots__load_s3_csv@load_s3_csv.sql.snap b/datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap similarity index 80% rename from datafusion-cli/tests/snapshots/snaphots__load_s3_csv@load_s3_csv.sql.snap rename to datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap index a60d958d77bf..3a293290b9ee 100644 --- a/datafusion-cli/tests/snapshots/snaphots__load_s3_csv@load_s3_csv.sql.snap +++ b/datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap @@ -1,9 +1,9 @@ --- -source: tests/snaphots.rs +source: tests/cli_integration.rs info: program: datafusion-cli args: [] - stdin: "CREATE EXTERNAL TABLE cars\nSTORED AS CSV\nOPTIONS(\n 'aws.access_key_id' 'test',\n 'aws.secret_access_key' 'test',\n 'aws.region' 'us-east-1',\n 'aws.endpoint' 'http://localhost:4566',\n 'aws.allow_http' 'true'\n)\nLOCATION 's3://test-bucket/cars.csv';\n\nselect * from cars limit 1;" + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nOPTIONS(\n 'aws.access_key_id' 'test',\n 'aws.secret_access_key' 'test',\n 'aws.region' 'us-east-1',\n 'aws.endpoint' 'http://localhost:4566',\n 'aws.allow_http' 'true'\n)\nLOCATION 's3://test-bucket/cars.csv';\n\nSELECT * FROM CARS limit 1;" input_file: tests/sql/load_s3_csv.sql --- success: true diff --git a/datafusion-cli/tests/snapshots/test_storage_integration@select.sql.snap b/datafusion-cli/tests/snapshots/test_storage_integration@select.sql.snap new file mode 100644 index 000000000000..c137d9fe2b13 --- /dev/null +++ b/datafusion-cli/tests/snapshots/test_storage_integration@select.sql.snap @@ -0,0 +1,23 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: [] + stdin: select 1; +input_file: tests/sql/select.sql +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] ++----------+ +| Int64(1) | ++----------+ +| 1 | ++----------+ +1 row(s) fetched. +[ELAPSED] + +\q + +----- stderr ----- diff --git a/datafusion-cli/tests/sql/load_local_csv.sql b/datafusion-cli/tests/sql/load_local_csv.sql index 00147a5881c8..8920c48c5f5f 100644 --- a/datafusion-cli/tests/sql/load_local_csv.sql +++ b/datafusion-cli/tests/sql/load_local_csv.sql @@ -1,6 +1,6 @@ -CREATE EXTERNAL TABLE cars +CREATE EXTERNAL TABLE CARS STORED AS CSV LOCATION '../datafusion/core/tests/data/cars.csv' OPTIONS ('has_header' 'TRUE'); -select * from cars limit 1; \ No newline at end of file +SELECT * FROM CARS limit 1; \ No newline at end of file diff --git a/datafusion-cli/tests/sql/load_s3_csv.sql b/datafusion-cli/tests/sql/load_s3_csv.sql index 57c51ad71b4f..1d27e5a30cfe 100644 --- a/datafusion-cli/tests/sql/load_s3_csv.sql +++ b/datafusion-cli/tests/sql/load_s3_csv.sql @@ -1,4 +1,4 @@ -CREATE EXTERNAL TABLE cars +CREATE EXTERNAL TABLE CARS STORED AS CSV OPTIONS( 'aws.access_key_id' 'test', @@ -9,4 +9,4 @@ OPTIONS( ) LOCATION 's3://test-bucket/cars.csv'; -select * from cars limit 1; \ No newline at end of file +SELECT * FROM CARS limit 1; \ No newline at end of file diff --git a/datafusion-cli/tests/data/sql.txt b/datafusion-cli/tests/sql/select.sql similarity index 100% rename from datafusion-cli/tests/data/sql.txt rename to datafusion-cli/tests/sql/select.sql From 0574ab8b449f454f54ae1c3c39bc0a63377ecd5c Mon Sep 17 00:00:00 2001 From: blaginin Date: Mon, 9 Dec 2024 15:29:13 +0000 Subject: [PATCH 07/22] Unify transform and parse --- datafusion/common/src/config.rs | 40 +++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index fa073c939f6d..78323e7da803 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -19,6 +19,7 @@ use std::any::Any; use std::collections::{BTreeMap, HashMap}; +use std::error::Error; use std::fmt::{self, Display}; use std::str::FromStr; @@ -978,29 +979,34 @@ impl ConfigField for Option { } } +fn parse(input: &str) -> Result +where + T: FromStr, + T::Err: Display, + ::Err: Sync + Send + Error + 'static, +{ + input.parse().map_err(|e| { + DataFusionError::Context( + format!("Error parsing '{}' as {}", input, stringify!(T),), + Box::new(DataFusionError::External(Box::new(e))), + ) + }) +} + #[macro_export] macro_rules! config_field { - ($t:ty $(, $transform:expr)?) => { + ($t:ty) => { + config_field!($t, value => parse(value)?); + }; + + ($t:ty, $arg:ident => $transform:expr) => { impl ConfigField for $t { fn visit(&self, v: &mut V, key: &str, description: &'static str) { v.some(key, self, description) } - fn set(&mut self, _: &str, value: &str) -> Result<()> { - $( - let value = $transform(&value); - )? - - *self = value.parse().map_err(|e| { - DataFusionError::Context( - format!( - "Error parsing '{}' as {}", - value, - stringify!($t), - ), - Box::new(DataFusionError::External(Box::new(e))), - ) - })?; + fn set(&mut self, _: &str, $arg: &str) -> Result<()> { + *self = $transform; Ok(()) } } @@ -1008,7 +1014,7 @@ macro_rules! config_field { } config_field!(String); -config_field!(bool, str::to_lowercase); +config_field!(bool, value => parse(value.to_lowercase().as_str())?); config_field!(usize); config_field!(f64); config_field!(u64); From 06c013d7113d8618b12e490fb45533f8e6fdf19e Mon Sep 17 00:00:00 2001 From: blaginin Date: Mon, 9 Dec 2024 15:56:08 +0000 Subject: [PATCH 08/22] Fix tests --- datafusion/common/src/config.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index 78323e7da803..289057053785 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -987,7 +987,11 @@ where { input.parse().map_err(|e| { DataFusionError::Context( - format!("Error parsing '{}' as {}", input, stringify!(T),), + format!( + "Error parsing '{}' as {}", + input, + std::any::type_name::() + ), Box::new(DataFusionError::External(Box::new(e))), ) }) From 65809a79081123b22e3562ff82b2a35e60e583f8 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 10 Dec 2024 13:43:32 +0000 Subject: [PATCH 09/22] Setup CLI --- .github/workflows/rust.yml | 26 +++++++++++++------ datafusion-cli/tests/integration_setup.bash | 12 ++++----- ...t_storage_integration@load_s3_csv.sql.snap | 2 +- datafusion-cli/tests/sql/load_s3_csv.sql | 9 +++---- 4 files changed, 29 insertions(+), 20 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f4e3d2fbf4d6..ef36bec733e4 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -165,20 +165,30 @@ jobs: name: cargo test datafusion-cli (amd64) needs: [ linux-build-lib ] runs-on: ubuntu-latest - container: - image: amd64/rust + env: + AWS_ENDPOINT: http://localhost:9000 + AWS_ACCESS_KEY_ID: DataFusionLogin + AWS_SECRET_ACCESS_KEY: DataFusionPassword steps: - uses: actions/checkout@v4 with: submodules: true - name: Setup Rust toolchain - uses: ./.github/actions/setup-builder - with: - rust-version: stable - - name: Run tests (excluding doctests) run: | - cd datafusion-cli - cargo test --lib --tests --bins --all-features + rustup toolchain install stable + rustup default stable + - name: Setup S3-compatible storage + working-directory: datafusion-cli + run: | + echo "MINIO_CONTAINER=$(docker run -d -p 9000:9000 -e MINIO_ROOT_USER=DataFusionLogin -e MINIO_ROOT_PASSWORD=DataFusionPassword quay.io/minio/minio server /data)" >> $GITHUB_ENV + aws s3 mb s3://datafusion --endpoint-url=$AWS_ENDPOINT + aws s3 cp ../datafusion/core/tests/data/cars.csv s3://datafusion/cars.csv --endpoint-url=$AWS_ENDPOINT + + - name: Run tests (excluding doctests, but with integration tests) + working-directory: datafusion-cli + env: + TEST_STORAGE_INTEGRATION: 1 + run: cargo test --lib --tests --bins --all-features - name: Verify Working Directory Clean run: git diff --exit-code diff --git a/datafusion-cli/tests/integration_setup.bash b/datafusion-cli/tests/integration_setup.bash index e1dc9eb0e86d..40ccec88c9e5 100644 --- a/datafusion-cli/tests/integration_setup.bash +++ b/datafusion-cli/tests/integration_setup.bash @@ -5,12 +5,12 @@ export TEST_INTEGRATION=1 export AWS_DEFAULT_REGION=us-east-1 -export AWS_ACCESS_KEY_ID=test -export AWS_SECRET_ACCESS_KEY=test -export AWS_ENDPOINT=http://localhost:4566 +export AWS_ACCESS_KEY_ID=DataFusionLogin +export AWS_SECRET_ACCESS_KEY=DataFusionPassword +export AWS_ENDPOINT=http://localhost:9000 export AWS_ALLOW_HTTP=true -export AWS_BUCKET_NAME=test-bucket +export AWS_BUCKET_NAME=datafusion -aws s3 mb s3://test-bucket --endpoint-url=$AWS_ENDPOINT -aws s3 cp ../datafusion/core/tests/data/cars.csv s3://test-bucket/cars.csv --endpoint-url=$AWS_ENDPOINT +aws s3 mb s3://datafusion --endpoint-url=$AWS_ENDPOINT +aws s3 cp ../datafusion/core/tests/data/cars.csv s3://datafusion/cars.csv --endpoint-url=$AWS_ENDPOINT diff --git a/datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap b/datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap index 3a293290b9ee..2dc648d3820f 100644 --- a/datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap +++ b/datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap @@ -3,7 +3,7 @@ source: tests/cli_integration.rs info: program: datafusion-cli args: [] - stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nOPTIONS(\n 'aws.access_key_id' 'test',\n 'aws.secret_access_key' 'test',\n 'aws.region' 'us-east-1',\n 'aws.endpoint' 'http://localhost:4566',\n 'aws.allow_http' 'true'\n)\nLOCATION 's3://test-bucket/cars.csv';\n\nSELECT * FROM CARS limit 1;" + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nOPTIONS(\n 'aws.access_key_id' 'DataFusionLogin',\n 'aws.secret_access_key' 'DataFusionPassword',\n 'aws.endpoint' 'http://localhost:9000',\n 'aws.allow_http' 'true'\n)\nLOCATION 's3://datafusion/cars.csv';\n\nSELECT * FROM CARS limit 1;" input_file: tests/sql/load_s3_csv.sql --- success: true diff --git a/datafusion-cli/tests/sql/load_s3_csv.sql b/datafusion-cli/tests/sql/load_s3_csv.sql index 1d27e5a30cfe..a087ee7c7fdf 100644 --- a/datafusion-cli/tests/sql/load_s3_csv.sql +++ b/datafusion-cli/tests/sql/load_s3_csv.sql @@ -1,12 +1,11 @@ CREATE EXTERNAL TABLE CARS STORED AS CSV OPTIONS( - 'aws.access_key_id' 'test', - 'aws.secret_access_key' 'test', - 'aws.region' 'us-east-1', - 'aws.endpoint' 'http://localhost:4566', + 'aws.access_key_id' 'DataFusionLogin', + 'aws.secret_access_key' 'DataFusionPassword', + 'aws.endpoint' 'http://localhost:9000', 'aws.allow_http' 'true' ) -LOCATION 's3://test-bucket/cars.csv'; +LOCATION 's3://datafusion/cars.csv'; SELECT * FROM CARS limit 1; \ No newline at end of file From 05a562f110bd56c41cf3564bd45c5170b1a85dbf Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 10 Dec 2024 18:05:12 +0000 Subject: [PATCH 10/22] Show minio output --- .github/workflows/rust.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index ef36bec733e4..cf30f60a8707 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -177,7 +177,7 @@ jobs: run: | rustup toolchain install stable rustup default stable - - name: Setup S3-compatible storage + - name: Setup Minio - S3-compatible storage working-directory: datafusion-cli run: | echo "MINIO_CONTAINER=$(docker run -d -p 9000:9000 -e MINIO_ROOT_USER=DataFusionLogin -e MINIO_ROOT_PASSWORD=DataFusionPassword quay.io/minio/minio server /data)" >> $GITHUB_ENV @@ -191,6 +191,9 @@ jobs: run: cargo test --lib --tests --bins --all-features - name: Verify Working Directory Clean run: git diff --exit-code + - name: Minio Output + if: ${{ !cancelled() }} + run: docker logs $MINIO_CONTAINER linux-test-example: name: cargo examples (amd64) From 36f85500db4d33e3b942543415a12aec66a5b384 Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 10 Dec 2024 18:11:20 +0000 Subject: [PATCH 11/22] Format Cargo.toml --- datafusion-cli/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index f6851562eb2d..ce05b5549b67 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -65,7 +65,7 @@ url = "2.2" [dev-dependencies] assert_cmd = "2.0" ctor = "0.2.0" -predicates = "3.0" -rstest = "0.22" insta = { version = "1.41.1", features = ["glob", "filters"] } insta-cmd = "0.6.0" +predicates = "3.0" +rstest = "0.22" From 921f22939aa9a9ef34ff129b135450bbc07258f1 Mon Sep 17 00:00:00 2001 From: blaginin Date: Wed, 11 Dec 2024 20:52:43 +0000 Subject: [PATCH 12/22] Do not hardcode AWS params --- .github/workflows/rust.yml | 12 +- datafusion-cli/CONTRIBUTING.md | 70 +++++ datafusion-cli/Cargo.lock | 284 ++++++++++++++++++ datafusion-cli/Cargo.toml | 1 + datafusion-cli/tests/cli_integration.rs | 81 ++++- ...v.sql.snap => cli@load_local_csv.sql.snap} | 0 ..._csv.sql.snap => cli@load_s3_csv.sql.snap} | 2 +- ...on@select.sql.snap => cli@select.sql.snap} | 0 ...tomatic.snap => cli_format@automatic.snap} | 0 ...rmat_test@csv.snap => cli_format@csv.snap} | 0 ...at_test@json.snap => cli_format@json.snap} | 0 ...t@nd-json.snap => cli_format@nd-json.snap} | 0 ..._test@table.snap => cli_format@table.snap} | 0 ...rmat_test@tsv.snap => cli_format@tsv.snap} | 0 datafusion-cli/tests/sql/load_s3_csv.sql | 6 - 15 files changed, 437 insertions(+), 19 deletions(-) create mode 100644 datafusion-cli/CONTRIBUTING.md rename datafusion-cli/tests/snapshots/{test_storage_integration@load_local_csv.sql.snap => cli@load_local_csv.sql.snap} (100%) rename datafusion-cli/tests/snapshots/{test_storage_integration@load_s3_csv.sql.snap => cli@load_s3_csv.sql.snap} (59%) rename datafusion-cli/tests/snapshots/{test_storage_integration@select.sql.snap => cli@select.sql.snap} (100%) rename datafusion-cli/tests/snapshots/{cli_format_test@automatic.snap => cli_format@automatic.snap} (100%) rename datafusion-cli/tests/snapshots/{cli_format_test@csv.snap => cli_format@csv.snap} (100%) rename datafusion-cli/tests/snapshots/{cli_format_test@json.snap => cli_format@json.snap} (100%) rename datafusion-cli/tests/snapshots/{cli_format_test@nd-json.snap => cli_format@nd-json.snap} (100%) rename datafusion-cli/tests/snapshots/{cli_format_test@table.snap => cli_format@table.snap} (100%) rename datafusion-cli/tests/snapshots/{cli_format_test@tsv.snap => cli_format@tsv.snap} (100%) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index cf30f60a8707..318d07a1ba4f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -167,8 +167,9 @@ jobs: runs-on: ubuntu-latest env: AWS_ENDPOINT: http://localhost:9000 - AWS_ACCESS_KEY_ID: DataFusionLogin - AWS_SECRET_ACCESS_KEY: DataFusionPassword + AWS_ACCESS_KEY_ID: TEST-DataFusionLogin + AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword + steps: - uses: actions/checkout@v4 with: @@ -179,11 +180,8 @@ jobs: rustup default stable - name: Setup Minio - S3-compatible storage working-directory: datafusion-cli - run: | - echo "MINIO_CONTAINER=$(docker run -d -p 9000:9000 -e MINIO_ROOT_USER=DataFusionLogin -e MINIO_ROOT_PASSWORD=DataFusionPassword quay.io/minio/minio server /data)" >> $GITHUB_ENV - aws s3 mb s3://datafusion --endpoint-url=$AWS_ENDPOINT - aws s3 cp ../datafusion/core/tests/data/cars.csv s3://datafusion/cars.csv --endpoint-url=$AWS_ENDPOINT - + run: + echo "MINIO_CONTAINER=$(docker run -d -p 9000:9000 -e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword quay.io/minio/minio server /data)" >> $GITHUB_ENV - name: Run tests (excluding doctests, but with integration tests) working-directory: datafusion-cli env: diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md new file mode 100644 index 000000000000..b3183abdc92e --- /dev/null +++ b/datafusion-cli/CONTRIBUTING.md @@ -0,0 +1,70 @@ + + +# Development instructions + +## Running Tests + +Tests can be run using `cargo` + +```shell +cargo test +``` + +## Running Storage Integration Tests + +By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and +then provide the necessary configuration for that object store. + +### AWS + +To test the S3 integration against [Minio](https://github.com/minio/minio) + +First start up a container with Minio + +``` +$ LOCALSTACK_VERSION=sha256:a0b79cb2430f1818de2c66ce89d41bba40f5a1823410f5a7eaf3494b692eed97 +$ podman run -d -p 4566:4566 localstack/localstack@$LOCALSTACK_VERSION +$ podman run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2 +``` + +Setup environment + +``` +export TEST_INTEGRATION=1 +export AWS_DEFAULT_REGION=us-east-1 +export AWS_ACCESS_KEY_ID=test +export AWS_SECRET_ACCESS_KEY=test +export AWS_ENDPOINT=http://localhost:4566 +export AWS_ALLOW_HTTP=true +export AWS_BUCKET_NAME=test-bucket +``` + +Create a bucket using the AWS CLI + +``` +podman run --net=host --env-host amazon/aws-cli --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket +``` + +Run tests + +``` +$ cargo test --features aws +``` + diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index cb657b316ecd..dbebdd05c01e 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -505,6 +505,7 @@ dependencies = [ "aws-credential-types", "aws-sigv4", "aws-smithy-async", + "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -521,6 +522,40 @@ dependencies = [ "uuid", ] +[[package]] +name = "aws-sdk-s3" +version = "1.65.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3ba2c5c0f2618937ce3d4a5ad574b86775576fa24006bcb3128c6e2cbf3c34e" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json 0.61.1", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http-body 0.4.6", + "lru", + "once_cell", + "percent-encoding", + "regex-lite", + "sha2", + "tracing", + "url", +] + [[package]] name = "aws-sdk-sso" version = "1.50.0" @@ -595,20 +630,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2" dependencies = [ "aws-credential-types", + "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", + "crypto-bigint 0.5.5", "form_urlencoded", "hex", "hmac", "http 0.2.12", "http 1.2.0", "once_cell", + "p256", "percent-encoding", + "ring", "sha2", + "subtle", "time", "tracing", + "zeroize", ] [[package]] @@ -622,12 +663,45 @@ dependencies = [ "tokio", ] +[[package]] +name = "aws-smithy-checksums" +version = "0.60.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1a71073fca26775c8b5189175ea8863afb1c9ea2cceb02a5de5ad9dfbaa795" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc32c", + "crc32fast", + "hex", + "http 0.2.12", + "http-body 0.4.6", + "md-5", + "pin-project-lite", + "sha1", + "sha2", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cef7d0a272725f87e51ba2bf89f8c21e4df61b9e49ae1ac367a6d69916ef7c90" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + [[package]] name = "aws-smithy-http" version = "0.60.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6" dependencies = [ + "aws-smithy-eventstream", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", @@ -778,6 +852,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + [[package]] name = "base64" version = "0.21.7" @@ -800,6 +880,12 @@ dependencies = [ "vsimd", ] +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + [[package]] name = "bitflags" version = "1.3.2" @@ -1065,6 +1151,12 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "const-random" version = "0.1.18" @@ -1135,6 +1227,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -1156,6 +1257,28 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core", + "subtle", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -1291,6 +1414,7 @@ dependencies = [ "async-trait", "aws-config", "aws-credential-types", + "aws-sdk-s3", "aws-sdk-sso", "aws-sdk-ssooidc", "aws-sdk-sts", @@ -1652,6 +1776,16 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid", + "zeroize", +] + [[package]] name = "deranged" version = "0.3.11" @@ -1716,12 +1850,44 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der", + "elliptic-curve", + "rfc6979", + "signature", +] + [[package]] name = "either" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der", + "digest", + "ff", + "generic-array", + "group", + "pkcs8", + "rand_core", + "sec1", + "subtle", + "zeroize", +] + [[package]] name = "encode_unicode" version = "0.3.6" @@ -1796,6 +1962,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core", + "subtle", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1837,6 +2013,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1989,6 +2171,17 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core", + "subtle", +] + [[package]] name = "h2" version = "0.3.26" @@ -2053,6 +2246,11 @@ name = "hashbrown" version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "heck" @@ -2672,6 +2870,15 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.2", +] + [[package]] name = "lz4_flex" version = "0.11.3" @@ -2923,6 +3130,17 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2", +] + [[package]] name = "parking_lot" version = "0.12.3" @@ -3063,6 +3281,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.31" @@ -3384,6 +3612,17 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + [[package]] name = "ring" version = "0.17.8" @@ -3635,6 +3874,20 @@ dependencies = [ "untrusted", ] +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -3727,6 +3980,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha2" version = "0.10.8" @@ -3753,6 +4017,16 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest", + "rand_core", +] + [[package]] name = "similar" version = "2.6.0" @@ -3823,6 +4097,16 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "sqlparser" version = "0.52.0" diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index ce05b5549b67..9720af4ed9f1 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -69,3 +69,4 @@ insta = { version = "1.41.1", features = ["glob", "filters"] } insta-cmd = "0.6.0" predicates = "3.0" rstest = "0.22" +aws-sdk-s3 = { version = "1.65.0" } diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index d90156a79e5f..3c97986560df 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -19,6 +19,10 @@ use std::process::Command; use rstest::rstest; +use aws_config::Region; +use aws_credential_types::Credentials; +use aws_sdk_s3::error::SdkError; +use aws_sdk_ssooidc::config::BehaviorVersion; use insta::{glob, Settings}; use insta_cmd::{assert_cmd_snapshot, get_cargo_bin}; use std::{env, fs}; @@ -76,7 +80,7 @@ fn cli_quick_test<'a>( #[case("nd-json")] #[case("automatic")] #[test] -fn cli_format_test<'a>(#[case] format: &'a str) { +fn test_cli_format<'a>(#[case] format: &'a str) { let mut settings = make_settings(); settings.set_snapshot_suffix(format); let _bound = settings.bind_to_scope(); @@ -84,16 +88,83 @@ fn cli_format_test<'a>(#[case] format: &'a str) { let mut cmd = cli(); cmd.args(["--command", "select 1", "-q", "--format", format]); - assert_cmd_snapshot!("cli_format_test", cmd); + assert_cmd_snapshot!("cli_format", cmd); } -#[test] -fn test_storage_integration() { +async fn setup_s3_storage() -> aws_sdk_s3::Client { + let access_key_id = + env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); + let secret_access_key = + env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); + + let region = Region::new(env::var("AWS_REGION").unwrap_or("df-test".to_string())); + + let allow_non_test_credentials = env::var("ALLOW_NON_TEST_CREDENTIALS") + .map(|v| v == "1") + .unwrap_or(false); + let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); + + if allow_non_test_credentials + || !access_key_id.starts_with("TEST-") + || !secret_access_key.starts_with("TEST-") + { + panic!("Refusing with non-test credentials. Either set ALLOW_NON_TEST_CREDENTIALS=1 or add TEST- for AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY"); + } + + let creds = Credentials::new(access_key_id, secret_access_key, None, None, "test"); + let config = aws_sdk_s3::Config::builder() + .credentials_provider(creds) + .endpoint_url(endpoint_url) + .region(region) + .behavior_version(BehaviorVersion::v2024_03_28()) + .build(); + + aws_sdk_s3::Client::from_conf(config) +} + +#[tokio::test] +async fn test_cli() { if env::var("TEST_STORAGE_INTEGRATION").is_err() { eprintln!("Skipping external storages integration tests"); return; } + let aws_client = setup_s3_storage().await; + let bucket_name = "datafusion"; + + match aws_client.head_bucket().bucket(bucket_name).send().await { + Ok(_) => {} + Err(SdkError::ServiceError(err)) + if matches!( + err.err(), + aws_sdk_s3::operation::head_bucket::HeadBucketError::NotFound(_) + ) => + { + aws_client + .create_bucket() + .bucket("datafusion") + .send() + .await + .expect("Failed to create bucket"); + } + Err(e) => panic!("Failed to head bucket: {:?}", e), + } + + let file_name = "../datafusion/core/tests/data/cars.csv"; + let body = + aws_sdk_s3::primitives::ByteStream::from_path(std::path::Path::new(file_name)) + .await + .expect("Failed to read file"); + + aws_client + .put_object() + .bucket("datafusion") + .key("cars.csv") + .body(body) + .send() + .await + .expect("Failed to put object"); + let mut settings = make_settings(); settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); @@ -101,6 +172,6 @@ fn test_storage_integration() { glob!("sql/*.sql", |path| { let input = fs::read_to_string(path).unwrap(); - assert_cmd_snapshot!("test_storage_integration", cli().pass_stdin(input)) + assert_cmd_snapshot!("cli", cli().pass_stdin(input)) }); } diff --git a/datafusion-cli/tests/snapshots/test_storage_integration@load_local_csv.sql.snap b/datafusion-cli/tests/snapshots/cli@load_local_csv.sql.snap similarity index 100% rename from datafusion-cli/tests/snapshots/test_storage_integration@load_local_csv.sql.snap rename to datafusion-cli/tests/snapshots/cli@load_local_csv.sql.snap diff --git a/datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap b/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap similarity index 59% rename from datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap rename to datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap index 2dc648d3820f..237a69298b0c 100644 --- a/datafusion-cli/tests/snapshots/test_storage_integration@load_s3_csv.sql.snap +++ b/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap @@ -3,7 +3,7 @@ source: tests/cli_integration.rs info: program: datafusion-cli args: [] - stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nOPTIONS(\n 'aws.access_key_id' 'DataFusionLogin',\n 'aws.secret_access_key' 'DataFusionPassword',\n 'aws.endpoint' 'http://localhost:9000',\n 'aws.allow_http' 'true'\n)\nLOCATION 's3://datafusion/cars.csv';\n\nSELECT * FROM CARS limit 1;" + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://datafusion/cars.csv';\n\nSELECT * FROM CARS limit 1;" input_file: tests/sql/load_s3_csv.sql --- success: true diff --git a/datafusion-cli/tests/snapshots/test_storage_integration@select.sql.snap b/datafusion-cli/tests/snapshots/cli@select.sql.snap similarity index 100% rename from datafusion-cli/tests/snapshots/test_storage_integration@select.sql.snap rename to datafusion-cli/tests/snapshots/cli@select.sql.snap diff --git a/datafusion-cli/tests/snapshots/cli_format_test@automatic.snap b/datafusion-cli/tests/snapshots/cli_format@automatic.snap similarity index 100% rename from datafusion-cli/tests/snapshots/cli_format_test@automatic.snap rename to datafusion-cli/tests/snapshots/cli_format@automatic.snap diff --git a/datafusion-cli/tests/snapshots/cli_format_test@csv.snap b/datafusion-cli/tests/snapshots/cli_format@csv.snap similarity index 100% rename from datafusion-cli/tests/snapshots/cli_format_test@csv.snap rename to datafusion-cli/tests/snapshots/cli_format@csv.snap diff --git a/datafusion-cli/tests/snapshots/cli_format_test@json.snap b/datafusion-cli/tests/snapshots/cli_format@json.snap similarity index 100% rename from datafusion-cli/tests/snapshots/cli_format_test@json.snap rename to datafusion-cli/tests/snapshots/cli_format@json.snap diff --git a/datafusion-cli/tests/snapshots/cli_format_test@nd-json.snap b/datafusion-cli/tests/snapshots/cli_format@nd-json.snap similarity index 100% rename from datafusion-cli/tests/snapshots/cli_format_test@nd-json.snap rename to datafusion-cli/tests/snapshots/cli_format@nd-json.snap diff --git a/datafusion-cli/tests/snapshots/cli_format_test@table.snap b/datafusion-cli/tests/snapshots/cli_format@table.snap similarity index 100% rename from datafusion-cli/tests/snapshots/cli_format_test@table.snap rename to datafusion-cli/tests/snapshots/cli_format@table.snap diff --git a/datafusion-cli/tests/snapshots/cli_format_test@tsv.snap b/datafusion-cli/tests/snapshots/cli_format@tsv.snap similarity index 100% rename from datafusion-cli/tests/snapshots/cli_format_test@tsv.snap rename to datafusion-cli/tests/snapshots/cli_format@tsv.snap diff --git a/datafusion-cli/tests/sql/load_s3_csv.sql b/datafusion-cli/tests/sql/load_s3_csv.sql index a087ee7c7fdf..d530eb76b822 100644 --- a/datafusion-cli/tests/sql/load_s3_csv.sql +++ b/datafusion-cli/tests/sql/load_s3_csv.sql @@ -1,11 +1,5 @@ CREATE EXTERNAL TABLE CARS STORED AS CSV -OPTIONS( - 'aws.access_key_id' 'DataFusionLogin', - 'aws.secret_access_key' 'DataFusionPassword', - 'aws.endpoint' 'http://localhost:9000', - 'aws.allow_http' 'true' -) LOCATION 's3://datafusion/cars.csv'; SELECT * FROM CARS limit 1; \ No newline at end of file From 107c51542ad7db5d6129fbf4fb4152d7d159877a Mon Sep 17 00:00:00 2001 From: blaginin Date: Wed, 11 Dec 2024 21:30:44 +0000 Subject: [PATCH 13/22] Test options parsing --- datafusion-cli/tests/cli_integration.rs | 98 +++++++++++++++---- .../tests/snapshots/aws_options.snap | 25 +++++ .../tests/snapshots/cli@load_s3_csv.sql.snap | 2 +- datafusion-cli/tests/sql/load_s3_csv.sql | 2 +- 4 files changed, 107 insertions(+), 20 deletions(-) create mode 100644 datafusion-cli/tests/snapshots/aws_options.snap diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 3c97986560df..c1aae5eec12e 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -34,6 +34,8 @@ fn cli() -> Command { fn make_settings() -> Settings { let mut settings = Settings::clone_current(); settings.set_prepend_module_to_snapshot(false); + settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); + settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); settings } @@ -97,12 +99,12 @@ async fn setup_s3_storage() -> aws_sdk_s3::Client { let secret_access_key = env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); + let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); let region = Region::new(env::var("AWS_REGION").unwrap_or("df-test".to_string())); let allow_non_test_credentials = env::var("ALLOW_NON_TEST_CREDENTIALS") .map(|v| v == "1") .unwrap_or(false); - let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); if allow_non_test_credentials || !access_key_id.starts_with("TEST-") @@ -122,16 +124,7 @@ async fn setup_s3_storage() -> aws_sdk_s3::Client { aws_sdk_s3::Client::from_conf(config) } -#[tokio::test] -async fn test_cli() { - if env::var("TEST_STORAGE_INTEGRATION").is_err() { - eprintln!("Skipping external storages integration tests"); - return; - } - - let aws_client = setup_s3_storage().await; - let bucket_name = "datafusion"; - +async fn create_bucket(bucket_name: &str, aws_client: &aws_sdk_s3::Client) { match aws_client.head_bucket().bucket(bucket_name).send().await { Ok(_) => {} Err(SdkError::ServiceError(err)) @@ -149,25 +142,47 @@ async fn test_cli() { } Err(e) => panic!("Failed to head bucket: {:?}", e), } +} - let file_name = "../datafusion/core/tests/data/cars.csv"; +async fn move_file_to_bucket( + from_path: &str, + to_path: &str, + bucket_name: &str, + aws_client: &aws_sdk_s3::Client, +) { let body = - aws_sdk_s3::primitives::ByteStream::from_path(std::path::Path::new(file_name)) + aws_sdk_s3::primitives::ByteStream::from_path(std::path::Path::new(from_path)) .await .expect("Failed to read file"); aws_client .put_object() - .bucket("datafusion") - .key("cars.csv") + .bucket(bucket_name) + .key(to_path) .body(body) .send() .await .expect("Failed to put object"); +} - let mut settings = make_settings(); - settings.add_filter(r"Elapsed .* seconds\.", "[ELAPSED]"); - settings.add_filter(r"DataFusion CLI v.*", "[CLI_VERSION]"); +#[tokio::test] +async fn test_cli() { + if env::var("TEST_STORAGE_INTEGRATION").is_err() { + eprintln!("Skipping external storages integration tests"); + return; + } + + let aws_client = setup_s3_storage().await; + create_bucket("datafusion", &aws_client).await; + move_file_to_bucket( + "../datafusion/core/tests/data/cars.csv", + "test_cli/cars.csv", + "datafusion", + &aws_client, + ) + .await; + + let settings = make_settings(); let _bound = settings.bind_to_scope(); glob!("sql/*.sql", |path| { @@ -175,3 +190,50 @@ async fn test_cli() { assert_cmd_snapshot!("cli", cli().pass_stdin(input)) }); } + +#[tokio::test] +async fn test_aws_options() { + // Separate test is needed to pass aws as options in sql and not via env + + if env::var("TEST_STORAGE_INTEGRATION").is_err() { + eprintln!("Skipping external storages integration tests"); + return; + } + + let aws_client = setup_s3_storage().await; + create_bucket("datafusion", &aws_client).await; + move_file_to_bucket( + "../datafusion/core/tests/data/cars.csv", + "test_aws_options/cars.csv", + "datafusion", + &aws_client, + ) + .await; + + let settings = make_settings(); + let _bound = settings.bind_to_scope(); + + let access_key_id = + env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); + let secret_access_key = + env::var("AWS_SECRET_ACCESS_KEY").expect("AWS_SECRET_ACCESS_KEY is not set"); + let endpoint_url = env::var("AWS_ENDPOINT").expect("AWS_ENDPOINT is not set"); + + let input = format!( + r#"CREATE EXTERNAL TABLE CARS +STORED AS CSV +LOCATION 's3://datafusion/test_aws_options/cars.csv' +OPTIONS( + 'aws.access_key_id' '{}', + 'aws.secret_access_key' '{}', + 'aws.endpoint' '{}', + 'aws.allow_http' 'true' +); + +SELECT * FROM CARS limit 1; +"#, + access_key_id, secret_access_key, endpoint_url + ); + + assert_cmd_snapshot!("aws_options", cli().env_clear().pass_stdin(input)); +} diff --git a/datafusion-cli/tests/snapshots/aws_options.snap b/datafusion-cli/tests/snapshots/aws_options.snap new file mode 100644 index 000000000000..270d600a1a15 --- /dev/null +++ b/datafusion-cli/tests/snapshots/aws_options.snap @@ -0,0 +1,25 @@ +--- +source: tests/cli_integration.rs +info: + program: datafusion-cli + args: [] + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://datafusion/test_aws_options/cars.csv'\nOPTIONS(\n 'aws.access_key_id' 'TEST-DataFusionLogin',\n 'aws.secret_access_key' 'TEST-DataFusionPassword',\n 'aws.endpoint' 'http://127.0.0.1:9000',\n 'aws.allow_http' 'true'\n);\n\nSELECT * FROM CARS limit 1;\n" +--- +success: true +exit_code: 0 +----- stdout ----- +[CLI_VERSION] +0 row(s) fetched. +[ELAPSED] + ++-----+-------+---------------------+ +| car | speed | time | ++-----+-------+---------------------+ +| red | 20.0 | 1996-04-12T12:05:03 | ++-----+-------+---------------------+ +1 row(s) fetched. +[ELAPSED] + +\q + +----- stderr ----- diff --git a/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap b/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap index 237a69298b0c..cb866eb3a95e 100644 --- a/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap +++ b/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap @@ -3,7 +3,7 @@ source: tests/cli_integration.rs info: program: datafusion-cli args: [] - stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://datafusion/cars.csv';\n\nSELECT * FROM CARS limit 1;" + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://datafusion/test_cli/cars.csv';\n\nSELECT * FROM CARS limit 1;" input_file: tests/sql/load_s3_csv.sql --- success: true diff --git a/datafusion-cli/tests/sql/load_s3_csv.sql b/datafusion-cli/tests/sql/load_s3_csv.sql index d530eb76b822..3aa009a8847b 100644 --- a/datafusion-cli/tests/sql/load_s3_csv.sql +++ b/datafusion-cli/tests/sql/load_s3_csv.sql @@ -1,5 +1,5 @@ CREATE EXTERNAL TABLE CARS STORED AS CSV -LOCATION 's3://datafusion/cars.csv'; +LOCATION 's3://datafusion/test_cli/cars.csv'; SELECT * FROM CARS limit 1; \ No newline at end of file From 2d430a007c897310f77f4ebecdc109119b96d7f5 Mon Sep 17 00:00:00 2001 From: blaginin Date: Wed, 11 Dec 2024 21:44:29 +0000 Subject: [PATCH 14/22] Add allow http --- .github/workflows/rust.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 318d07a1ba4f..222c5f573919 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -169,6 +169,8 @@ jobs: AWS_ENDPOINT: http://localhost:9000 AWS_ACCESS_KEY_ID: TEST-DataFusionLogin AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword + TEST_STORAGE_INTEGRATION: 1 + AWS_ALLOW_HTTP: true steps: - uses: actions/checkout@v4 @@ -184,8 +186,6 @@ jobs: echo "MINIO_CONTAINER=$(docker run -d -p 9000:9000 -e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword quay.io/minio/minio server /data)" >> $GITHUB_ENV - name: Run tests (excluding doctests, but with integration tests) working-directory: datafusion-cli - env: - TEST_STORAGE_INTEGRATION: 1 run: cargo test --lib --tests --bins --all-features - name: Verify Working Directory Clean run: git diff --exit-code From 4b5650638a9bc0e4c9aa4b2a07bcc7ccf904f8b2 Mon Sep 17 00:00:00 2001 From: blaginin Date: Thu, 12 Dec 2024 18:13:13 +0000 Subject: [PATCH 15/22] Fix aws build --- datafusion-cli/tests/cli_integration.rs | 36 +++++++++---------- .../tests/snapshots/cli@load_s3_csv.sql.snap | 2 +- datafusion-cli/tests/sql/load_s3_csv.sql | 2 +- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index c1aae5eec12e..628d03f16b63 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -93,7 +93,7 @@ fn test_cli_format<'a>(#[case] format: &'a str) { assert_cmd_snapshot!("cli_format", cmd); } -async fn setup_s3_storage() -> aws_sdk_s3::Client { +async fn s3_client() -> aws_sdk_s3::Client { let access_key_id = env::var("AWS_ACCESS_KEY_ID").expect("AWS_ACCESS_KEY_ID is not set"); let secret_access_key = @@ -124,8 +124,8 @@ async fn setup_s3_storage() -> aws_sdk_s3::Client { aws_sdk_s3::Client::from_conf(config) } -async fn create_bucket(bucket_name: &str, aws_client: &aws_sdk_s3::Client) { - match aws_client.head_bucket().bucket(bucket_name).send().await { +async fn create_bucket(bucket_name: &str, client: &aws_sdk_s3::Client) { + match client.head_bucket().bucket(bucket_name).send().await { Ok(_) => {} Err(SdkError::ServiceError(err)) if matches!( @@ -133,9 +133,9 @@ async fn create_bucket(bucket_name: &str, aws_client: &aws_sdk_s3::Client) { aws_sdk_s3::operation::head_bucket::HeadBucketError::NotFound(_) ) => { - aws_client + client .create_bucket() - .bucket("datafusion") + .bucket(bucket_name) .send() .await .expect("Failed to create bucket"); @@ -148,14 +148,14 @@ async fn move_file_to_bucket( from_path: &str, to_path: &str, bucket_name: &str, - aws_client: &aws_sdk_s3::Client, + client: &aws_sdk_s3::Client, ) { let body = aws_sdk_s3::primitives::ByteStream::from_path(std::path::Path::new(from_path)) .await .expect("Failed to read file"); - aws_client + client .put_object() .bucket(bucket_name) .key(to_path) @@ -172,13 +172,13 @@ async fn test_cli() { return; } - let aws_client = setup_s3_storage().await; - create_bucket("datafusion", &aws_client).await; + let client = s3_client().await; + create_bucket("cli", &client).await; move_file_to_bucket( "../datafusion/core/tests/data/cars.csv", - "test_cli/cars.csv", - "datafusion", - &aws_client, + "cars.csv", + "cli", + &client, ) .await; @@ -200,13 +200,13 @@ async fn test_aws_options() { return; } - let aws_client = setup_s3_storage().await; - create_bucket("datafusion", &aws_client).await; + let client = s3_client().await; + create_bucket("options", &client).await; move_file_to_bucket( "../datafusion/core/tests/data/cars.csv", - "test_aws_options/cars.csv", - "datafusion", - &aws_client, + "cars.csv", + "options", + &client, ) .await; @@ -222,7 +222,7 @@ async fn test_aws_options() { let input = format!( r#"CREATE EXTERNAL TABLE CARS STORED AS CSV -LOCATION 's3://datafusion/test_aws_options/cars.csv' +LOCATION 's3://options/cars.csv' OPTIONS( 'aws.access_key_id' '{}', 'aws.secret_access_key' '{}', diff --git a/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap b/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap index cb866eb3a95e..ebfb9ea6f42e 100644 --- a/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap +++ b/datafusion-cli/tests/snapshots/cli@load_s3_csv.sql.snap @@ -3,7 +3,7 @@ source: tests/cli_integration.rs info: program: datafusion-cli args: [] - stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://datafusion/test_cli/cars.csv';\n\nSELECT * FROM CARS limit 1;" + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://cli/cars.csv';\n\nSELECT * FROM CARS limit 1;" input_file: tests/sql/load_s3_csv.sql --- success: true diff --git a/datafusion-cli/tests/sql/load_s3_csv.sql b/datafusion-cli/tests/sql/load_s3_csv.sql index 3aa009a8847b..787fd4ba415d 100644 --- a/datafusion-cli/tests/sql/load_s3_csv.sql +++ b/datafusion-cli/tests/sql/load_s3_csv.sql @@ -1,5 +1,5 @@ CREATE EXTERNAL TABLE CARS STORED AS CSV -LOCATION 's3://datafusion/test_cli/cars.csv'; +LOCATION 's3://cli/cars.csv'; SELECT * FROM CARS limit 1; \ No newline at end of file From ad9734e39079330a8512a49e40db81fc5b6713cd Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 13 Dec 2024 09:16:47 +0000 Subject: [PATCH 16/22] Fix ip --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 222c5f573919..f83c12eea9ef 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -166,7 +166,7 @@ jobs: needs: [ linux-build-lib ] runs-on: ubuntu-latest env: - AWS_ENDPOINT: http://localhost:9000 + AWS_ENDPOINT: http://127.0.0.1:9000/ AWS_ACCESS_KEY_ID: TEST-DataFusionLogin AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword TEST_STORAGE_INTEGRATION: 1 From 0b24daab79f1e09bd1b169fb0b224da37d6e2098 Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 13 Dec 2024 12:59:37 +0000 Subject: [PATCH 17/22] =?UTF-8?q?Remove=20slash=20=E2=98=A0=EF=B8=8F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/rust.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f83c12eea9ef..c170cbb8513a 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -166,7 +166,7 @@ jobs: needs: [ linux-build-lib ] runs-on: ubuntu-latest env: - AWS_ENDPOINT: http://127.0.0.1:9000/ + AWS_ENDPOINT: http://127.0.0.1:9000 AWS_ACCESS_KEY_ID: TEST-DataFusionLogin AWS_SECRET_ACCESS_KEY: TEST-DataFusionPassword TEST_STORAGE_INTEGRATION: 1 From 5f29c002dea2af675555133b164e4fa08c051529 Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 13 Dec 2024 13:46:20 +0000 Subject: [PATCH 18/22] Format cargo toml --- datafusion-cli/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index 9720af4ed9f1..7e737d4ffc75 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -64,9 +64,9 @@ url = "2.2" [dev-dependencies] assert_cmd = "2.0" +aws-sdk-s3 = { version = "1.65.0" } ctor = "0.2.0" insta = { version = "1.41.1", features = ["glob", "filters"] } insta-cmd = "0.6.0" predicates = "3.0" rstest = "0.22" -aws-sdk-s3 = { version = "1.65.0" } From a3826d4fbec98ddfa1d3bc4ff2bbbab85621aeb7 Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 13 Dec 2024 16:00:53 +0000 Subject: [PATCH 19/22] Remove integration_setup.bash --- datafusion-cli/tests/integration_setup.bash | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 datafusion-cli/tests/integration_setup.bash diff --git a/datafusion-cli/tests/integration_setup.bash b/datafusion-cli/tests/integration_setup.bash deleted file mode 100644 index 40ccec88c9e5..000000000000 --- a/datafusion-cli/tests/integration_setup.bash +++ /dev/null @@ -1,16 +0,0 @@ -# you should have localstack up, e.g by -#$ LOCALSTACK_VERSION=sha256:a0b79cb2430f1818de2c66ce89d41bba40f5a1823410f5a7eaf3494b692eed97 -#$ podman run -d -p 4566:4566 localstack/localstack@$LOCALSTACK_VERSION -#$ podman run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2 - -export TEST_INTEGRATION=1 -export AWS_DEFAULT_REGION=us-east-1 -export AWS_ACCESS_KEY_ID=DataFusionLogin -export AWS_SECRET_ACCESS_KEY=DataFusionPassword -export AWS_ENDPOINT=http://localhost:9000 -export AWS_ALLOW_HTTP=true -export AWS_BUCKET_NAME=datafusion - - -aws s3 mb s3://datafusion --endpoint-url=$AWS_ENDPOINT -aws s3 cp ../datafusion/core/tests/data/cars.csv s3://datafusion/cars.csv --endpoint-url=$AWS_ENDPOINT From 9cb1c99e9cef22d7b8868820cde22f46f33a243d Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 13 Dec 2024 16:15:59 +0000 Subject: [PATCH 20/22] Update docs --- datafusion-cli/CONTRIBUTING.md | 36 ++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/datafusion-cli/CONTRIBUTING.md b/datafusion-cli/CONTRIBUTING.md index b3183abdc92e..bbcde0c56cd5 100644 --- a/datafusion-cli/CONTRIBUTING.md +++ b/datafusion-cli/CONTRIBUTING.md @@ -27,9 +27,20 @@ Tests can be run using `cargo` cargo test ``` +## Snapshot testing + +To test CLI output, [Insta](https://github.com/mitsuhiko/insta) is used for snapshot testing. Snapshots are generated +and compared on each test run. If the output changes, tests will fail. +To review the changes, you can use Insta CLI: + +```shell +cargo install cargo-insta +cargo insta review +``` + ## Running Storage Integration Tests -By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and +By default, storage integration tests are not run. To run them you will need to set `TEST_STORAGE_INTEGRATION=1` and then provide the necessary configuration for that object store. ### AWS @@ -39,32 +50,23 @@ To test the S3 integration against [Minio](https://github.com/minio/minio) First start up a container with Minio ``` -$ LOCALSTACK_VERSION=sha256:a0b79cb2430f1818de2c66ce89d41bba40f5a1823410f5a7eaf3494b692eed97 -$ podman run -d -p 4566:4566 localstack/localstack@$LOCALSTACK_VERSION -$ podman run -d -p 1338:1338 amazon/amazon-ec2-metadata-mock:v1.9.2 --imdsv2 +$ docker run -d -p 9000:9000 -e MINIO_ROOT_USER=TEST-DataFusionLogin -e MINIO_ROOT_PASSWORD=TEST-DataFusionPassword quay.io/minio/minio server /data ``` Setup environment ``` -export TEST_INTEGRATION=1 -export AWS_DEFAULT_REGION=us-east-1 -export AWS_ACCESS_KEY_ID=test -export AWS_SECRET_ACCESS_KEY=test -export AWS_ENDPOINT=http://localhost:4566 +export TEST_STORAGE_INTEGRATION=1 +export AWS_ACCESS_KEY_ID=TEST-DataFusionLogin +export AWS_SECRET_ACCESS_KEY=TEST-DataFusionPassword +export AWS_ENDPOINT=http://127.0.0.1:9000 export AWS_ALLOW_HTTP=true -export AWS_BUCKET_NAME=test-bucket ``` -Create a bucket using the AWS CLI - -``` -podman run --net=host --env-host amazon/aws-cli --endpoint-url=http://localhost:4566 s3 mb s3://test-bucket -``` +Note that `AWS_ENDPOINT` is set without slash at the end. Run tests ``` -$ cargo test --features aws +$ cargo test ``` - From 53c9c51a1f4349bfccdae2a603f164010c6680e0 Mon Sep 17 00:00:00 2001 From: blaginin Date: Fri, 13 Dec 2024 16:30:00 +0000 Subject: [PATCH 21/22] Do not hardcode test names --- datafusion-cli/tests/cli_integration.rs | 8 ++++---- datafusion-cli/tests/snapshots/aws_options.snap | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/datafusion-cli/tests/cli_integration.rs b/datafusion-cli/tests/cli_integration.rs index 628d03f16b63..d0a302a9aa05 100644 --- a/datafusion-cli/tests/cli_integration.rs +++ b/datafusion-cli/tests/cli_integration.rs @@ -71,7 +71,7 @@ fn cli_quick_test<'a>( let mut cmd = cli(); cmd.args(args); - assert_cmd_snapshot!("cli_quick_test", cmd); + assert_cmd_snapshot!(cmd); } #[rstest] @@ -90,7 +90,7 @@ fn test_cli_format<'a>(#[case] format: &'a str) { let mut cmd = cli(); cmd.args(["--command", "select 1", "-q", "--format", format]); - assert_cmd_snapshot!("cli_format", cmd); + assert_cmd_snapshot!(cmd); } async fn s3_client() -> aws_sdk_s3::Client { @@ -187,7 +187,7 @@ async fn test_cli() { glob!("sql/*.sql", |path| { let input = fs::read_to_string(path).unwrap(); - assert_cmd_snapshot!("cli", cli().pass_stdin(input)) + assert_cmd_snapshot!(cli().pass_stdin(input)) }); } @@ -235,5 +235,5 @@ SELECT * FROM CARS limit 1; access_key_id, secret_access_key, endpoint_url ); - assert_cmd_snapshot!("aws_options", cli().env_clear().pass_stdin(input)); + assert_cmd_snapshot!(cli().env_clear().pass_stdin(input)); } diff --git a/datafusion-cli/tests/snapshots/aws_options.snap b/datafusion-cli/tests/snapshots/aws_options.snap index 270d600a1a15..d02f4bcc510a 100644 --- a/datafusion-cli/tests/snapshots/aws_options.snap +++ b/datafusion-cli/tests/snapshots/aws_options.snap @@ -3,7 +3,7 @@ source: tests/cli_integration.rs info: program: datafusion-cli args: [] - stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://datafusion/test_aws_options/cars.csv'\nOPTIONS(\n 'aws.access_key_id' 'TEST-DataFusionLogin',\n 'aws.secret_access_key' 'TEST-DataFusionPassword',\n 'aws.endpoint' 'http://127.0.0.1:9000',\n 'aws.allow_http' 'true'\n);\n\nSELECT * FROM CARS limit 1;\n" + stdin: "CREATE EXTERNAL TABLE CARS\nSTORED AS CSV\nLOCATION 's3://options/cars.csv'\nOPTIONS(\n 'aws.access_key_id' 'TEST-DataFusionLogin',\n 'aws.secret_access_key' 'TEST-DataFusionPassword',\n 'aws.endpoint' 'http://127.0.0.1:9000',\n 'aws.allow_http' 'true'\n);\n\nSELECT * FROM CARS limit 1;\n" --- success: true exit_code: 0 From 855315f52767bc090bdc61ebc86ac10ab0ad751b Mon Sep 17 00:00:00 2001 From: blaginin Date: Tue, 17 Dec 2024 21:53:38 +0000 Subject: [PATCH 22/22] Relock cargo --- datafusion-cli/Cargo.lock | 356 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 356 insertions(+) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 9552542befd8..602d51fab250 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -506,6 +506,7 @@ dependencies = [ "aws-credential-types", "aws-sigv4", "aws-smithy-async", + "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime", "aws-smithy-runtime-api", @@ -522,6 +523,40 @@ dependencies = [ "uuid", ] +[[package]] +name = "aws-sdk-s3" +version = "1.65.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3ba2c5c0f2618937ce3d4a5ad574b86775576fa24006bcb3128c6e2cbf3c34e" +dependencies = [ + "aws-credential-types", + "aws-runtime", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json 0.61.1", + "aws-smithy-runtime", + "aws-smithy-runtime-api", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", + "bytes", + "fastrand", + "hex", + "hmac", + "http 0.2.12", + "http-body 0.4.6", + "lru", + "once_cell", + "percent-encoding", + "regex-lite", + "sha2", + "tracing", + "url", +] + [[package]] name = "aws-sdk-sso" version = "1.50.0" @@ -596,20 +631,26 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7d3820e0c08d0737872ff3c7c1f21ebbb6693d832312d6152bf18ef50a5471c2" dependencies = [ "aws-credential-types", + "aws-smithy-eventstream", "aws-smithy-http", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", + "crypto-bigint 0.5.5", "form_urlencoded", "hex", "hmac", "http 0.2.12", "http 1.2.0", "once_cell", + "p256", "percent-encoding", + "ring", "sha2", + "subtle", "time", "tracing", + "zeroize", ] [[package]] @@ -623,12 +664,45 @@ dependencies = [ "tokio", ] +[[package]] +name = "aws-smithy-checksums" +version = "0.60.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba1a71073fca26775c8b5189175ea8863afb1c9ea2cceb02a5de5ad9dfbaa795" +dependencies = [ + "aws-smithy-http", + "aws-smithy-types", + "bytes", + "crc32c", + "crc32fast", + "hex", + "http 0.2.12", + "http-body 0.4.6", + "md-5", + "pin-project-lite", + "sha1", + "sha2", + "tracing", +] + +[[package]] +name = "aws-smithy-eventstream" +version = "0.60.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cef7d0a272725f87e51ba2bf89f8c21e4df61b9e49ae1ac367a6d69916ef7c90" +dependencies = [ + "aws-smithy-types", + "bytes", + "crc32fast", +] + [[package]] name = "aws-smithy-http" version = "0.60.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c8bc3e8fdc6b8d07d976e301c02fe553f72a39b7a9fea820e023268467d7ab6" dependencies = [ + "aws-smithy-eventstream", "aws-smithy-runtime-api", "aws-smithy-types", "bytes", @@ -779,6 +853,12 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "base16ct" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" + [[package]] name = "base64" version = "0.21.7" @@ -801,6 +881,12 @@ dependencies = [ "vsimd", ] +[[package]] +name = "base64ct" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" + [[package]] name = "bigdecimal" version = "0.4.7" @@ -1068,6 +1154,24 @@ dependencies = [ "unicode-width 0.2.0", ] +[[package]] +name = "console" +version = "0.15.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea3c6ecd8059b57859df5c69830340ed3c41d30e3da0c1cbed90a96ac853041b" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "windows-sys 0.59.0", +] + +[[package]] +name = "const-oid" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" + [[package]] name = "const-random" version = "0.1.18" @@ -1138,6 +1242,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + [[package]] name = "crc32fast" version = "1.4.2" @@ -1159,6 +1272,28 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +[[package]] +name = "crypto-bigint" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2b4b23cddf68b89b8f8069890e8c270d54e2d5fe1b143820234805e4cb17ef" +dependencies = [ + "generic-array", + "rand_core", + "subtle", + "zeroize", +] + +[[package]] +name = "crypto-bigint" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" +dependencies = [ + "rand_core", + "subtle", +] + [[package]] name = "crypto-common" version = "0.1.6" @@ -1293,6 +1428,7 @@ dependencies = [ "async-trait", "aws-config", "aws-credential-types", + "aws-sdk-s3", "aws-sdk-sso", "aws-sdk-ssooidc", "aws-sdk-sts", @@ -1304,6 +1440,8 @@ dependencies = [ "env_logger", "futures", "home", + "insta", + "insta-cmd", "mimalloc", "object_store", "parking_lot", @@ -1633,6 +1771,16 @@ dependencies = [ "sqlparser", ] +[[package]] +name = "der" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a467a65c5e759bce6e65eaf91cc29f466cdc57cb65777bd646872a8a1fd4de" +dependencies = [ + "const-oid", + "zeroize", +] + [[package]] name = "deranged" version = "0.3.11" @@ -1697,12 +1845,50 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "ecdsa" +version = "0.14.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" +dependencies = [ + "der", + "elliptic-curve", + "rfc6979", + "signature", +] + [[package]] name = "either" version = "1.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0" +[[package]] +name = "elliptic-curve" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" +dependencies = [ + "base16ct", + "crypto-bigint 0.4.9", + "der", + "digest", + "ff", + "generic-array", + "group", + "pkcs8", + "rand_core", + "sec1", + "subtle", + "zeroize", +] + +[[package]] +name = "encode_unicode" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" + [[package]] name = "endian-type" version = "0.1.2" @@ -1771,6 +1957,16 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "ff" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d013fc25338cc558c5c2cfbad646908fb23591e2404481826742b651c9af7160" +dependencies = [ + "rand_core", + "subtle", +] + [[package]] name = "fixedbitset" version = "0.4.2" @@ -1812,6 +2008,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1951,6 +2153,30 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "globset" +version = "0.4.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15f1ce686646e7f1e19bf7d5533fe443a45dbfb990e00629110797578b42fb19" +dependencies = [ + "aho-corasick", + "bstr", + "log", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "group" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" +dependencies = [ + "ff", + "rand_core", + "subtle", +] + [[package]] name = "h2" version = "0.3.26" @@ -2015,6 +2241,11 @@ name = "hashbrown" version = "0.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] [[package]] name = "heck" @@ -2389,6 +2620,33 @@ dependencies = [ "hashbrown 0.15.2", ] +[[package]] +name = "insta" +version = "1.41.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e9ffc4d4892617c50a928c52b2961cb5174b6fc6ebf252b2fac9d21955c48b8" +dependencies = [ + "console", + "globset", + "lazy_static", + "linked-hash-map", + "regex", + "serde", + "similar", + "walkdir", +] + +[[package]] +name = "insta-cmd" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffeeefa927925cced49ccb01bf3e57c9d4cd132df21e576eb9415baeab2d3de6" +dependencies = [ + "insta", + "serde", + "serde_json", +] + [[package]] name = "integer-encoding" version = "3.0.4" @@ -2567,6 +2825,12 @@ dependencies = [ "libc", ] +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "linux-raw-sys" version = "0.4.14" @@ -2595,6 +2859,15 @@ version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" +[[package]] +name = "lru" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" +dependencies = [ + "hashbrown 0.15.2", +] + [[package]] name = "lz4_flex" version = "0.11.3" @@ -2847,6 +3120,17 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "p256" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" +dependencies = [ + "ecdsa", + "elliptic-curve", + "sha2", +] + [[package]] name = "parking_lot" version = "0.12.3" @@ -2987,6 +3271,16 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "pkcs8" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9eca2c590a5f85da82668fa685c09ce2888b9430e83299debf1f34b65fd4a4ba" +dependencies = [ + "der", + "spki", +] + [[package]] name = "pkg-config" version = "0.3.31" @@ -3308,6 +3602,17 @@ dependencies = [ "windows-registry", ] +[[package]] +name = "rfc6979" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7743f17af12fa0b03b803ba12cd6a8d9483a587e89c69445e3909655c0b9fabb" +dependencies = [ + "crypto-bigint 0.4.9", + "hmac", + "zeroize", +] + [[package]] name = "ring" version = "0.17.8" @@ -3559,6 +3864,20 @@ dependencies = [ "untrusted", ] +[[package]] +name = "sec1" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" +dependencies = [ + "base16ct", + "der", + "generic-array", + "pkcs8", + "subtle", + "zeroize", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -3660,6 +3979,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha1" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sha2" version = "0.10.8" @@ -3686,6 +4016,22 @@ dependencies = [ "libc", ] +[[package]] +name = "signature" +version = "1.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74233d3b3b2f6d4b006dc19dee745e73e2a6bfb6f93607cd3b02bd5b00797d7c" +dependencies = [ + "digest", + "rand_core", +] + +[[package]] +name = "similar" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1de1d4f81173b03af4c0cbed3c898f6bff5b870e4a7f5d6f4057d62a7a4b686e" + [[package]] name = "siphasher" version = "0.3.11" @@ -3750,6 +4096,16 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "spki" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67cf02bbac7a337dc36e4f5a693db6c21e7863f45070f7064577eb4367a3212b" +dependencies = [ + "base64ct", + "der", +] + [[package]] name = "sqlparser" version = "0.52.0"