From 441a4356b889edde0802ec1b394f1f10c18283c3 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 16 Feb 2024 09:52:46 -0700 Subject: [PATCH] Prepare 36.0.0-rc1 (#9251) --- .gitignore | 4 + Cargo.toml | 28 +-- benchmarks/Cargo.toml | 8 +- datafusion-cli/Cargo.lock | 137 ++++++++------- datafusion-cli/Cargo.toml | 4 +- datafusion/CHANGELOG.md | 1 + datafusion/core/Cargo.toml | 6 +- datafusion/optimizer/Cargo.toml | 4 +- datafusion/proto/Cargo.toml | 2 +- datafusion/sqllogictest/Cargo.toml | 2 +- dev/changelog/36.0.0.md | 264 +++++++++++++++++++++++++++++ dev/update_datafusion_versions.py | 21 ++- docs/Cargo.toml | 2 +- docs/source/user-guide/configs.md | 2 +- 14 files changed, 382 insertions(+), 103 deletions(-) create mode 100644 dev/changelog/36.0.0.md diff --git a/.gitignore b/.gitignore index 203455e4a796..05479fd0f07d 100644 --- a/.gitignore +++ b/.gitignore @@ -107,3 +107,7 @@ datafusion/sqllogictests/test_files/tpch/data/* # Scratch temp dir for sqllogictests datafusion/sqllogictest/test_files/scratch* + +# rat +filtered_rat.txt +rat.txt diff --git a/Cargo.toml b/Cargo.toml index 2e2a0103973d..31c0005d582d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -29,7 +29,7 @@ license = "Apache-2.0" readme = "README.md" repository = "https://github.com/apache/arrow-datafusion" rust-version = "1.72" -version = "35.0.0" +version = "36.0.0" [workspace.dependencies] arrow = { version = "50.0.0", features = ["prettyprint"] } @@ -46,19 +46,19 @@ bytes = "1.4" chrono = { version = "0.4.34", default-features = false } ctor = "0.2.0" dashmap = "5.4.0" -datafusion = { path = "datafusion/core", version = "35.0.0" } -datafusion-common = { path = "datafusion/common", version = "35.0.0" } -datafusion-execution = { path = "datafusion/execution", version = "35.0.0" } -datafusion-expr = { path = "datafusion/expr", version = "35.0.0" } -datafusion-functions = { path = "datafusion/functions", version = "35.0.0" } -datafusion-functions-array = { path = "datafusion/functions-array", version = "35.0.0" } -datafusion-optimizer = { path = "datafusion/optimizer", version = "35.0.0" } -datafusion-physical-expr = { path = "datafusion/physical-expr", version = "35.0.0" } -datafusion-physical-plan = { path = "datafusion/physical-plan", version = "35.0.0" } -datafusion-proto = { path = "datafusion/proto", version = "35.0.0" } -datafusion-sql = { path = "datafusion/sql", version = "35.0.0" } -datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "35.0.0" } -datafusion-substrait = { path = "datafusion/substrait", version = "35.0.0" } +datafusion = { path = "datafusion/core", version = "36.0.0" } +datafusion-common = { path = "datafusion/common", version = "36.0.0" } +datafusion-execution = { path = "datafusion/execution", version = "36.0.0" } +datafusion-expr = { path = "datafusion/expr", version = "36.0.0" } +datafusion-functions = { path = "datafusion/functions", version = "36.0.0" } +datafusion-functions-array = { path = "datafusion/functions-array", version = "36.0.0" } +datafusion-optimizer = { path = "datafusion/optimizer", version = "36.0.0" } +datafusion-physical-expr = { path = "datafusion/physical-expr", version = "36.0.0" } +datafusion-physical-plan = { path = "datafusion/physical-plan", version = "36.0.0" } +datafusion-proto = { path = "datafusion/proto", version = "36.0.0" } +datafusion-sql = { path = "datafusion/sql", version = "36.0.0" } +datafusion-sqllogictest = { path = "datafusion/sqllogictest", version = "36.0.0" } +datafusion-substrait = { path = "datafusion/substrait", version = "36.0.0" } doc-comment = "0.3" env_logger = "0.11" futures = "0.3" diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 3ee547410744..90ff83bd53d7 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-benchmarks" description = "DataFusion Benchmarks" -version = "35.0.0" +version = "36.0.0" edition = { workspace = true } authors = ["Apache Arrow "] homepage = "https://github.com/apache/arrow-datafusion" @@ -33,8 +33,8 @@ snmalloc = ["snmalloc-rs"] [dependencies] arrow = { workspace = true } -datafusion = { path = "../datafusion/core", version = "35.0.0" } -datafusion-common = { path = "../datafusion/common", version = "35.0.0" } +datafusion = { path = "../datafusion/core", version = "36.0.0" } +datafusion-common = { path = "../datafusion/common", version = "36.0.0" } env_logger = { workspace = true } futures = { workspace = true } log = { workspace = true } @@ -49,4 +49,4 @@ test-utils = { path = "../test-utils/", version = "0.1.0" } tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] } [dev-dependencies] -datafusion-proto = { path = "../datafusion/proto", version = "35.0.0" } +datafusion-proto = { path = "../datafusion/proto", version = "36.0.0" } diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 25bb30e5bc56..69456446f52b 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -25,9 +25,9 @@ checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" [[package]] name = "ahash" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +checksum = "42cd52102d3df161c77a887b608d7a4897d7cc112886a9537b738a887a03aaff" dependencies = [ "cfg-if", "const-random", @@ -270,7 +270,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.2.2", + "indexmap 2.2.3", "lexical-core", "num", "serde", @@ -384,7 +384,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -802,9 +802,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "d32a994c2b3ca201d9b263612a374263f05e7adde37c4707f693dcd375076d1f" [[package]] name = "byteorder" @@ -880,9 +880,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91d7b79e99bfaa0d47da0687c43aa3b7381938a62ad3a6498599039321f660b7" +checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" dependencies = [ "chrono", "chrono-tz-build", @@ -1023,9 +1023,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] @@ -1074,7 +1074,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30d2b3721e861707777e3195b0158f950ae6dc4a27e4d02ff9f67e3eb3de199e" dependencies = [ "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -1098,7 +1098,7 @@ dependencies = [ [[package]] name = "datafusion" -version = "35.0.0" +version = "36.0.0" dependencies = [ "ahash", "apache-avro", @@ -1126,7 +1126,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.3", - "indexmap 2.2.2", + "indexmap 2.2.3", "itertools", "log", "num-traits", @@ -1148,7 +1148,7 @@ dependencies = [ [[package]] name = "datafusion-cli" -version = "35.0.0" +version = "36.0.0" dependencies = [ "arrow", "assert_cmd", @@ -1176,7 +1176,7 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "35.0.0" +version = "36.0.0" dependencies = [ "ahash", "apache-avro", @@ -1195,7 +1195,7 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "35.0.0" +version = "36.0.0" dependencies = [ "arrow", "chrono", @@ -1214,7 +1214,7 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "35.0.0" +version = "36.0.0" dependencies = [ "ahash", "arrow", @@ -1228,7 +1228,7 @@ dependencies = [ [[package]] name = "datafusion-functions" -version = "35.0.0" +version = "36.0.0" dependencies = [ "arrow", "base64", @@ -1241,7 +1241,7 @@ dependencies = [ [[package]] name = "datafusion-functions-array" -version = "35.0.0" +version = "36.0.0" dependencies = [ "arrow", "datafusion-common", @@ -1253,7 +1253,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "35.0.0" +version = "36.0.0" dependencies = [ "arrow", "async-trait", @@ -1269,7 +1269,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "35.0.0" +version = "36.0.0" dependencies = [ "ahash", "arrow", @@ -1288,7 +1288,7 @@ dependencies = [ "half", "hashbrown 0.14.3", "hex", - "indexmap 2.2.2", + "indexmap 2.2.3", "itertools", "log", "md-5", @@ -1303,7 +1303,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "35.0.0" +version = "36.0.0" dependencies = [ "ahash", "arrow", @@ -1319,7 +1319,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.3", - "indexmap 2.2.2", + "indexmap 2.2.3", "itertools", "log", "once_cell", @@ -1332,7 +1332,7 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "35.0.0" +version = "36.0.0" dependencies = [ "arrow", "arrow-schema", @@ -1417,9 +1417,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "encoding_rs" @@ -1607,7 +1607,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -1691,7 +1691,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 2.2.2", + "indexmap 2.2.3", "slab", "tokio", "tokio-util", @@ -1751,9 +1751,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.5" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0c62115964e08cb8039170eb33c1d0e2388a256930279edca206fff675f82c3" +checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd" [[package]] name = "hex" @@ -1908,9 +1908,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.2" +version = "2.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "824b2ae422412366ba479e8111fd301f7b5faece8149317bb81925979a53f520" +checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -1954,9 +1954,9 @@ checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" [[package]] name = "jobserver" -version = "0.1.27" +version = "0.1.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +checksum = "ab46a6e9526ddef3ae7f787c06f0f2600639ba80ea3eade3d8e670a2230f51d6" dependencies = [ "libc", ] @@ -2258,19 +2258,18 @@ checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" dependencies = [ "autocfg", "num-integer", @@ -2291,9 +2290,9 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", "libm", @@ -2305,7 +2304,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.5", + "hermit-abi 0.3.6", "libc", ] @@ -2338,7 +2337,7 @@ dependencies = [ "rand", "reqwest", "ring 0.17.7", - "rustls-pemfile 2.0.0", + "rustls-pemfile 2.1.0", "serde", "serde_json", "snafu", @@ -2467,7 +2466,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.2.2", + "indexmap 2.2.3", ] [[package]] @@ -2525,7 +2524,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -2542,9 +2541,9 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pkg-config" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "powerfmt" @@ -2920,9 +2919,9 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "2.0.0" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35e4980fa29e4c4b212ffb3db068a564cbf560e51d3944b7c88bd8bf5bec64f4" +checksum = "3c333bb734fcdedcea57de1602543590f545f127dc8b533324318fd492c5c70b" dependencies = [ "base64", "rustls-pki-types", @@ -2930,9 +2929,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.2.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a716eb65e3158e90e17cd93d855216e27bde02745ab842f2cab4a39dba1bacf" +checksum = "048a63e5b3ac996d78d402940b5fa47973d2d080c6c6fffa1d0f19c4445310b7" [[package]] name = "rustls-webpki" @@ -3065,7 +3064,7 @@ checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -3200,7 +3199,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -3246,7 +3245,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -3259,7 +3258,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -3281,9 +3280,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.48" +version = "2.0.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "915aea9e586f80826ee59f8453c1101f9d1c4b3964cd2460185ee8e299ada496" dependencies = [ "proc-macro2", "quote", @@ -3352,22 +3351,22 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.56" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -3462,7 +3461,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -3559,7 +3558,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -3604,7 +3603,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] @@ -3758,7 +3757,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", "wasm-bindgen-shared", ] @@ -3792,7 +3791,7 @@ checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4050,7 +4049,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.49", ] [[package]] diff --git a/datafusion-cli/Cargo.toml b/datafusion-cli/Cargo.toml index e40aa6107c7d..45e7b740bf87 100644 --- a/datafusion-cli/Cargo.toml +++ b/datafusion-cli/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "datafusion-cli" description = "Command Line Client for DataFusion query engine." -version = "35.0.0" +version = "36.0.0" authors = ["Apache Arrow "] edition = "2021" keywords = ["arrow", "datafusion", "query", "sql"] @@ -35,7 +35,7 @@ async-trait = "0.1.41" aws-config = "0.55" aws-credential-types = "0.55" clap = { version = "3", features = ["derive", "cargo"] } -datafusion = { path = "../datafusion/core", version = "35.0.0", features = ["avro", "crypto_expressions", "encoding_expressions", "parquet", "regex_expressions", "unicode_expressions", "compression"] } +datafusion = { path = "../datafusion/core", version = "36.0.0", features = ["avro", "crypto_expressions", "encoding_expressions", "parquet", "regex_expressions", "unicode_expressions", "compression"] } datafusion-common = { path = "../datafusion/common" } dirs = "4.0.0" env_logger = "0.9" diff --git a/datafusion/CHANGELOG.md b/datafusion/CHANGELOG.md index ae9da0e865e9..2d09782a3982 100644 --- a/datafusion/CHANGELOG.md +++ b/datafusion/CHANGELOG.md @@ -19,6 +19,7 @@ # Changelog +- [36.0.0](../dev/changelog/36.0.0.md) - [35.0.0](../dev/changelog/35.0.0.md) - [34.0.0](../dev/changelog/34.0.0.md) - [33.0.0](../dev/changelog/33.0.0.md) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 38d3012a5ac7..09718791d0f7 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -67,13 +67,13 @@ bytes = { workspace = true } bzip2 = { version = "0.4.3", optional = true } chrono = { workspace = true } dashmap = { workspace = true } -datafusion-common = { path = "../common", version = "35.0.0", features = ["object_store"], default-features = false } +datafusion-common = { path = "../common", version = "36.0.0", features = ["object_store"], default-features = false } datafusion-execution = { workspace = true } datafusion-expr = { workspace = true } datafusion-functions = { workspace = true } datafusion-functions-array = { workspace = true, optional = true } -datafusion-optimizer = { path = "../optimizer", version = "35.0.0", default-features = false } -datafusion-physical-expr = { path = "../physical-expr", version = "35.0.0", default-features = false } +datafusion-optimizer = { path = "../optimizer", version = "36.0.0", default-features = false } +datafusion-physical-expr = { path = "../physical-expr", version = "36.0.0", default-features = false } datafusion-physical-plan = { workspace = true } datafusion-sql = { workspace = true } flate2 = { version = "1.0.24", optional = true } diff --git a/datafusion/optimizer/Cargo.toml b/datafusion/optimizer/Cargo.toml index e4e9660f93b4..cac46eda9871 100644 --- a/datafusion/optimizer/Cargo.toml +++ b/datafusion/optimizer/Cargo.toml @@ -44,7 +44,7 @@ async-trait = { workspace = true } chrono = { workspace = true } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } -datafusion-physical-expr = { path = "../physical-expr", version = "35.0.0", default-features = false } +datafusion-physical-expr = { path = "../physical-expr", version = "36.0.0", default-features = false } hashbrown = { version = "0.14", features = ["raw"] } itertools = { workspace = true } log = { workspace = true } @@ -52,5 +52,5 @@ regex-syntax = "0.8.0" [dev-dependencies] ctor = { workspace = true } -datafusion-sql = { path = "../sql", version = "35.0.0" } +datafusion-sql = { path = "../sql", version = "36.0.0" } env_logger = "0.11.0" diff --git a/datafusion/proto/Cargo.toml b/datafusion/proto/Cargo.toml index a1032d28a2e6..59a9129c6d7a 100644 --- a/datafusion/proto/Cargo.toml +++ b/datafusion/proto/Cargo.toml @@ -44,7 +44,7 @@ parquet = ["datafusion/parquet", "datafusion-common/parquet"] [dependencies] arrow = { workspace = true } chrono = { workspace = true } -datafusion = { path = "../core", version = "35.0.0" } +datafusion = { path = "../core", version = "36.0.0" } datafusion-common = { workspace = true } datafusion-expr = { workspace = true } object_store = { workspace = true } diff --git a/datafusion/sqllogictest/Cargo.toml b/datafusion/sqllogictest/Cargo.toml index 4f8d1b6ac403..7c54d6bf355b 100644 --- a/datafusion/sqllogictest/Cargo.toml +++ b/datafusion/sqllogictest/Cargo.toml @@ -37,7 +37,7 @@ bigdecimal = { workspace = true } bytes = { version = "1.4.0", optional = true } chrono = { workspace = true, optional = true } clap = { version = "4.4.8", features = ["derive", "env"] } -datafusion = { path = "../core", version = "35.0.0" } +datafusion = { path = "../core", version = "36.0.0" } datafusion-common = { workspace = true } futures = { version = "0.3.28" } half = { workspace = true } diff --git a/dev/changelog/36.0.0.md b/dev/changelog/36.0.0.md new file mode 100644 index 000000000000..86f6a380ceb0 --- /dev/null +++ b/dev/changelog/36.0.0.md @@ -0,0 +1,264 @@ + + +## [36.0.0](https://github.com/apache/arrow-datafusion/tree/36.0.0) (2024-02-16) + +[Full Changelog](https://github.com/apache/arrow-datafusion/compare/35.0.0...36.0.0) + +**Breaking changes:** + +- Deprecate make_scalar_function [#8878](https://github.com/apache/arrow-datafusion/pull/8878) (viirya) +- Change `Accumulator::evaluate` and `Accumulator::state` to take `&mut self` [#8925](https://github.com/apache/arrow-datafusion/pull/8925) (alamb) +- Rename `CatalogList` to `CatalogProviderList` [#9002](https://github.com/apache/arrow-datafusion/pull/9002) (comphead) +- Remove some recursive cloning from logical planning [#9050](https://github.com/apache/arrow-datafusion/pull/9050) (ozankabak) +- Support `FixedSizeList` type coercion [#8902](https://github.com/apache/arrow-datafusion/pull/8902) (Weijun-H) +- Add `ColumnarValue::values_to_arrays`, deprecate `columnar_values_to_array` [#9114](https://github.com/apache/arrow-datafusion/pull/9114) (alamb) + +**Performance related:** + +- Minor: Add new Extended ClickBench benchmark queries [#8950](https://github.com/apache/arrow-datafusion/pull/8950) (alamb) + +**Implemented enhancements:** + +- feat: support `stride` in `array_slice`, change indexes to be`1` based [#8829](https://github.com/apache/arrow-datafusion/pull/8829) (Weijun-H) +- feat: emitting partial join results in `HashJoinStream` [#8020](https://github.com/apache/arrow-datafusion/pull/8020) (korowa) +- feat:implement sql style 'ends_with' and 'instr' string function [#8862](https://github.com/apache/arrow-datafusion/pull/8862) (zy-kkk) +- feat: Support parquet bloom filter pruning for decimal128 [#8930](https://github.com/apache/arrow-datafusion/pull/8930) (Ted-Jiang) +- feat: Disable client console highlight by default [#9013](https://github.com/apache/arrow-datafusion/pull/9013) (comphead) +- feat: support the ergonomics of getting list slice with stride [#8946](https://github.com/apache/arrow-datafusion/pull/8946) (Weijun-H) +- feat: Parallel Arrow file format reading [#8897](https://github.com/apache/arrow-datafusion/pull/8897) (my-vegetable-has-exploded) +- feat: support array_reverse [#9023](https://github.com/apache/arrow-datafusion/pull/9023) (Weijun-H) +- feat: issue #8969 adding position function [#8988](https://github.com/apache/arrow-datafusion/pull/8988) (Lordworms) +- feat: support `LargeList` in `flatten` [#9110](https://github.com/apache/arrow-datafusion/pull/9110) (Weijun-H) +- feat: improve `make_date` performance [#9112](https://github.com/apache/arrow-datafusion/pull/9112) (r3stl355) +- feat: add github action to self-assign the issue [#9132](https://github.com/apache/arrow-datafusion/pull/9132) (r3stl355) +- feat: add ability to query the remote http(s) location directly in datafusion-cli [#9150](https://github.com/apache/arrow-datafusion/pull/9150) (r3stl355) +- feat: implement select directly from s3 and gcs locations in datafusion-cli [#9199](https://github.com/apache/arrow-datafusion/pull/9199) (r3stl355) +- feat: support block gzip for streams [#9175](https://github.com/apache/arrow-datafusion/pull/9175) (tshauck) + +**Fixed bugs:** + +- fix: recursive initialize method [#8937](https://github.com/apache/arrow-datafusion/pull/8937) (waynexia) +- fix: common_subexpr_eliminate rule should not apply to short-circuit expression [#8928](https://github.com/apache/arrow-datafusion/pull/8928) (haohuaijin) +- fix: issue #8922 make row group test more readable [#8941](https://github.com/apache/arrow-datafusion/pull/8941) (Lordworms) +- fix: allow placeholders to be substituted when coercible [#8977](https://github.com/apache/arrow-datafusion/pull/8977) (kallisti-dev) +- fix: unambiguously truncate time in date_trunc function [#9068](https://github.com/apache/arrow-datafusion/pull/9068) (mhilton) +- fix: schema metadata retrieval when listing parquet table [#9134](https://github.com/apache/arrow-datafusion/pull/9134) (brayanjuls) + +**Documentation updates:** + +- Prepare 35.0.0-rc1 [#8924](https://github.com/apache/arrow-datafusion/pull/8924) (andygrove) +- Update project links [#8954](https://github.com/apache/arrow-datafusion/pull/8954) (comphead) +- Document parallelism and thread scheduling in the architecture guide [#8986](https://github.com/apache/arrow-datafusion/pull/8986) (alamb) +- chore: fix license badge in README [#9008](https://github.com/apache/arrow-datafusion/pull/9008) (suyanhanx) +- docs: fix array_position docs [#9003](https://github.com/apache/arrow-datafusion/pull/9003) (tshauck) +- Docs: improve contributor guide to explain how to work with tickets [#8999](https://github.com/apache/arrow-datafusion/pull/8999) (alamb) +- Document minimum required rust version [#9071](https://github.com/apache/arrow-datafusion/pull/9071) (comphead) +- Minor: Add ParadeDB to the list of users [#9018](https://github.com/apache/arrow-datafusion/pull/9018) (alamb) +- Update minimum rust version to 1.72 [#8997](https://github.com/apache/arrow-datafusion/pull/8997) (alamb) +- docs: add docs and example showing how to get the expression data type [#9118](https://github.com/apache/arrow-datafusion/pull/9118) (r3stl355) +- chore: Fix incorrect comment in substrait consumer [#9123](https://github.com/apache/arrow-datafusion/pull/9123) (caicancai) +- Minor: Fix Self referential links in readme [#9119](https://github.com/apache/arrow-datafusion/pull/9119) (alamb) +- Examples link in catalogs.rs leads to a 404 [#9194](https://github.com/apache/arrow-datafusion/pull/9194) (Omega359) +- Create `datafusion-functions-array` crate and move `ArrayToString` function into it [#9113](https://github.com/apache/arrow-datafusion/pull/9113) (alamb) + +**Merged pull requests:** + +- Add hash_join_single_partition_threshold_rows config [#8720](https://github.com/apache/arrow-datafusion/pull/8720) (maruschin) +- Prepare 35.0.0-rc1 [#8924](https://github.com/apache/arrow-datafusion/pull/8924) (andygrove) +- feat: support `stride` in `array_slice`, change indexes to be`1` based [#8829](https://github.com/apache/arrow-datafusion/pull/8829) (Weijun-H) +- fix: recursive initialize method [#8937](https://github.com/apache/arrow-datafusion/pull/8937) (waynexia) +- Fix expr partial ord test [#8908](https://github.com/apache/arrow-datafusion/pull/8908) (mustafasrepo) +- Simplify windows builtin functions return type [#8920](https://github.com/apache/arrow-datafusion/pull/8920) (comphead) +- Fix handling of nested leaf columns in parallel parquet writer [#8923](https://github.com/apache/arrow-datafusion/pull/8923) (devinjdangelo) +- feat: emitting partial join results in `HashJoinStream` [#8020](https://github.com/apache/arrow-datafusion/pull/8020) (korowa) +- fix: common_subexpr_eliminate rule should not apply to short-circuit expression [#8928](https://github.com/apache/arrow-datafusion/pull/8928) (haohuaijin) +- Support GroupsAccumulator accumulator for udaf [#8892](https://github.com/apache/arrow-datafusion/pull/8892) (guojidan) +- test: Port tests in `partitioned_csv.rs` to sqllogictest [#8919](https://github.com/apache/arrow-datafusion/pull/8919) (simicd) +- [CI] Fix RUSTFLAGS [#8929](https://github.com/apache/arrow-datafusion/pull/8929) (Jefffrey) +- Minor: Update datafusion-cli README to explain why it is not in the w… [#8938](https://github.com/apache/arrow-datafusion/pull/8938) (alamb) +- Add syntax highlight to datafusion-cli [#8918](https://github.com/apache/arrow-datafusion/pull/8918) (trungda) +- Update substrait requirement from 0.22.1 to 0.23.0 [#8943](https://github.com/apache/arrow-datafusion/pull/8943) (dependabot[bot]) +- Deprecate make_scalar_function [#8878](https://github.com/apache/arrow-datafusion/pull/8878) (viirya) +- Update project links [#8954](https://github.com/apache/arrow-datafusion/pull/8954) (comphead) +- fix: issue #8922 make row group test more readable [#8941](https://github.com/apache/arrow-datafusion/pull/8941) (Lordworms) +- feat:implement sql style 'ends_with' and 'instr' string function [#8862](https://github.com/apache/arrow-datafusion/pull/8862) (zy-kkk) +- [MINOR]: Extract aggregate topk function to `aggregate_topk.slt` [#8948](https://github.com/apache/arrow-datafusion/pull/8948) (mustafasrepo) +- Combine multiple `IN` lists in `ExprSimplifier` [#8949](https://github.com/apache/arrow-datafusion/pull/8949) (jayzhan211) +- Fix clippy failures: error: use of deprecated function `functions::make_scalar_function [#8972](https://github.com/apache/arrow-datafusion/pull/8972) (alamb) +- feat: Support parquet bloom filter pruning for decimal128 [#8930](https://github.com/apache/arrow-datafusion/pull/8930) (Ted-Jiang) +- [MINOR]: Update create_window_expr to refer only input schema [#8945](https://github.com/apache/arrow-datafusion/pull/8945) (mustafasrepo) +- Don't error in simplify_expressions rule [#8957](https://github.com/apache/arrow-datafusion/pull/8957) (haohuaijin) +- Use .zip to avoid unwrap [#8956](https://github.com/apache/arrow-datafusion/pull/8956) (Luv-Ray) +- Change `Accumulator::evaluate` and `Accumulator::state` to take `&mut self` [#8925](https://github.com/apache/arrow-datafusion/pull/8925) (alamb) +- Enhance simplifier by adding Canonicalize [#8780](https://github.com/apache/arrow-datafusion/pull/8780) (yyy1000) +- Find the correct fields when using page filter on `struct` fields in parquet [#8848](https://github.com/apache/arrow-datafusion/pull/8848) (manoj-inukolunu) +- fix: allow placeholders to be substituted when coercible [#8977](https://github.com/apache/arrow-datafusion/pull/8977) (kallisti-dev) +- Minor: improve CatalogProvider documentation with rationale and info about remote catalogs [#8968](https://github.com/apache/arrow-datafusion/pull/8968) (alamb) +- Improve to_timestamp docs [#8981](https://github.com/apache/arrow-datafusion/pull/8981) (Omega359) +- Add helper function for processing scalar function input [#8962](https://github.com/apache/arrow-datafusion/pull/8962) (viirya) +- Fix optimize projections bug [#8960](https://github.com/apache/arrow-datafusion/pull/8960) (mustafasrepo) +- NOT operator not return internal error when args are not boolean value [#8982](https://github.com/apache/arrow-datafusion/pull/8982) (guojidan) +- Minor: Add new Extended ClickBench benchmark queries [#8950](https://github.com/apache/arrow-datafusion/pull/8950) (alamb) +- Minor: Add comments to MSRV CI check to help if it fails [#8995](https://github.com/apache/arrow-datafusion/pull/8995) (alamb) +- Minor: Document memory management design on `MemoryPool` [#8966](https://github.com/apache/arrow-datafusion/pull/8966) (alamb) +- Fix LEAD/LAG window functions when default value null [#8989](https://github.com/apache/arrow-datafusion/pull/8989) (comphead) +- Optimize MIN/MAX when relation is empty [#8940](https://github.com/apache/arrow-datafusion/pull/8940) (viirya) +- [task #8203] Port tests in joins.rs to sqllogictest [#8996](https://github.com/apache/arrow-datafusion/pull/8996) (Tangruilin) +- [task #8213]Port tests in select.rs to sqllogictest [#8967](https://github.com/apache/arrow-datafusion/pull/8967) (Tangruilin) +- test: Port (last) `repartition.rs` query to sqllogictest [#8936](https://github.com/apache/arrow-datafusion/pull/8936) (simicd) +- Update to sqlparser `0.42.0` [#9000](https://github.com/apache/arrow-datafusion/pull/9000) (alamb) +- [MINOR]: Fix Optimize Projections Bug [#8992](https://github.com/apache/arrow-datafusion/pull/8992) (mustafasrepo) +- Make Topk aggregate tests deterministic [#8998](https://github.com/apache/arrow-datafusion/pull/8998) (mustafasrepo) +- Add support for Postgres LIKE operators [#8894](https://github.com/apache/arrow-datafusion/pull/8894) (gruuya) +- bug: Datafusion doesn't respect case sensitive table references [#8964](https://github.com/apache/arrow-datafusion/pull/8964) (xhwhis) +- Document parallelism and thread scheduling in the architecture guide [#8986](https://github.com/apache/arrow-datafusion/pull/8986) (alamb) +- Fix None Projections in Projection Pushdown [#9005](https://github.com/apache/arrow-datafusion/pull/9005) (berkaysynnada) +- Lead and Lag window functions should support default value with datatype other than Int64 [#9001](https://github.com/apache/arrow-datafusion/pull/9001) (viirya) +- chore: fix license badge in README [#9008](https://github.com/apache/arrow-datafusion/pull/9008) (suyanhanx) +- Minor: fix: #9010 - Optimizer schema change assert error is incorrect [#9012](https://github.com/apache/arrow-datafusion/pull/9012) (curtisleefulton) +- docs: fix array_position docs [#9003](https://github.com/apache/arrow-datafusion/pull/9003) (tshauck) +- Rename `CatalogList` to `CatalogProviderList` [#9002](https://github.com/apache/arrow-datafusion/pull/9002) (comphead) +- Safeguard against potential inexact row count being smaller than exact null count [#9007](https://github.com/apache/arrow-datafusion/pull/9007) (gruuya) +- Recursive CTEs: Stage 3 - add execution support [#8840](https://github.com/apache/arrow-datafusion/pull/8840) (matthewgapp) +- sqllogictest: move the creation of the nan_table from Rust to slt [#9022](https://github.com/apache/arrow-datafusion/pull/9022) (jonahgao) +- TreeNode refactor code deduplication: Part 3 [#8817](https://github.com/apache/arrow-datafusion/pull/8817) (ozankabak) +- feat: Disable client console highlight by default [#9013](https://github.com/apache/arrow-datafusion/pull/9013) (comphead) +- [task #8917] Implement information_schema.schemata [#8993](https://github.com/apache/arrow-datafusion/pull/8993) (Tangruilin) +- Properly encode STRING_AGG, NTH_VALUE in physical plan protobufs [#9027](https://github.com/apache/arrow-datafusion/pull/9027) (scsmithr) +- [task #8201] Port tests in expr.rs to sqllogictest, finish the left c… [#9014](https://github.com/apache/arrow-datafusion/pull/9014) (Tangruilin) +- Fix the clippy error of use of deprecated method [#9034](https://github.com/apache/arrow-datafusion/pull/9034) (viirya) +- feat: support the ergonomics of getting list slice with stride [#8946](https://github.com/apache/arrow-datafusion/pull/8946) (Weijun-H) +- Cache common referred expression at the window input [#9009](https://github.com/apache/arrow-datafusion/pull/9009) (mustafasrepo) +- Optimize `COUNT( DISTINCT ...)` for strings (up to 9x faster) [#8849](https://github.com/apache/arrow-datafusion/pull/8849) (jayzhan211) +- feat: Parallel Arrow file format reading [#8897](https://github.com/apache/arrow-datafusion/pull/8897) (my-vegetable-has-exploded) +- Change remove from swap to shift in index map [#9049](https://github.com/apache/arrow-datafusion/pull/9049) (mustafasrepo) +- Relax join keys constraint from Column to any physical expression for physical join operators [#8991](https://github.com/apache/arrow-datafusion/pull/8991) (viirya) +- Minor: Improve memory helper trait documentation [#9025](https://github.com/apache/arrow-datafusion/pull/9025) (alamb) +- Docs: improve contributor guide to explain how to work with tickets [#8999](https://github.com/apache/arrow-datafusion/pull/8999) (alamb) +- fix issue where upper and lower functions only work correctly on ascii character [#9054](https://github.com/apache/arrow-datafusion/pull/9054) (Omega359) +- Minor: small updates to bench.sh [#9035](https://github.com/apache/arrow-datafusion/pull/9035) (kmitchener) +- Chore: explicitly list out all Expr types in TypeCoercionRewriter::mutate [#9038](https://github.com/apache/arrow-datafusion/pull/9038) (guojidan) +- Minor: improve scalar functions document [#9029](https://github.com/apache/arrow-datafusion/pull/9029) (Weijun-H) +- [MINOR] Alter a SHJ test for relaxing "on" condition [#9065](https://github.com/apache/arrow-datafusion/pull/9065) (metesynnada) +- Remove some recursive cloning from logical planning [#9050](https://github.com/apache/arrow-datafusion/pull/9050) (ozankabak) +- minor: remove useless macro [#8979](https://github.com/apache/arrow-datafusion/pull/8979) (jackwener) +- Causality Analysis for Builtin Window Functions [#9048](https://github.com/apache/arrow-datafusion/pull/9048) (mustafasrepo) +- Minor: add doc examples for RawTableAllocExt [#9059](https://github.com/apache/arrow-datafusion/pull/9059) (alamb) +- Update substrait requirement from 0.23.0 to 0.24.0 [#9067](https://github.com/apache/arrow-datafusion/pull/9067) (dependabot[bot]) +- Remove single_file_output option from FileSinkConfig and Copy statement [#9041](https://github.com/apache/arrow-datafusion/pull/9041) (yyy1000) +- Add a make_date function [#9040](https://github.com/apache/arrow-datafusion/pull/9040) (Omega359) +- Speedup `DFSchema::merge` using HashSet indices [#9020](https://github.com/apache/arrow-datafusion/pull/9020) (simonvandel) +- Document minimum required rust version [#9071](https://github.com/apache/arrow-datafusion/pull/9071) (comphead) +- Return proper number of expressions for nth_value_agg [#9044](https://github.com/apache/arrow-datafusion/pull/9044) (mustafasrepo) +- ScalarUDF with zero arguments should be provided with one null array as parameter [#9031](https://github.com/apache/arrow-datafusion/pull/9031) (viirya) +- Update strum requirement from 0.25.0 to 0.26.1 [#9046](https://github.com/apache/arrow-datafusion/pull/9046) (dependabot[bot]) +- Create `datafusion-functions` crate, extract encode and decode to [#8705](https://github.com/apache/arrow-datafusion/pull/8705) (alamb) +- Add documentation for streaming usecase [#9070](https://github.com/apache/arrow-datafusion/pull/9070) (mustafasrepo) +- fix: unambiguously truncate time in date_trunc function [#9068](https://github.com/apache/arrow-datafusion/pull/9068) (mhilton) +- feat: support array_reverse [#9023](https://github.com/apache/arrow-datafusion/pull/9023) (Weijun-H) +- prettier to_timestamp_invoke [#9078](https://github.com/apache/arrow-datafusion/pull/9078) (Tangruilin) +- Handle invalid types for negation [#9066](https://github.com/apache/arrow-datafusion/pull/9066) (trungda) +- Minor: reduce unwraps in datetime_expressions.rs [#9072](https://github.com/apache/arrow-datafusion/pull/9072) (alamb) +- Remove custom doubling strategy + add examples to `VecAllocEx` [#9058](https://github.com/apache/arrow-datafusion/pull/9058) (alamb) +- Split physical_plan_tpch into separate benchmarks [#9043](https://github.com/apache/arrow-datafusion/pull/9043) (simonvandel) +- Minor: Add ParadeDB to the list of users [#9018](https://github.com/apache/arrow-datafusion/pull/9018) (alamb) +- [MINOR]: Add check for unnecessary projection [#9079](https://github.com/apache/arrow-datafusion/pull/9079) (mustafasrepo) +- chore(placeholder): update error message and add tests [#9073](https://github.com/apache/arrow-datafusion/pull/9073) (appletreeisyellow) +- refer to #8781, convert the internal_err! in datetime_expression.rs to exec_err! [#9083](https://github.com/apache/arrow-datafusion/pull/9083) (Tangruilin) +- Add benchmarks for to_timestamp and make_date functions [#9086](https://github.com/apache/arrow-datafusion/pull/9086) (Omega359) +- chore: Clarify ParadeDB branding [#9088](https://github.com/apache/arrow-datafusion/pull/9088) (philippemnoel) +- doc: Add example how to include latest datafusion [#9076](https://github.com/apache/arrow-datafusion/pull/9076) (comphead) +- Update minimum rust version to 1.72 [#8997](https://github.com/apache/arrow-datafusion/pull/8997) (alamb) +- Fix typo in an error message [#9099](https://github.com/apache/arrow-datafusion/pull/9099) (AdamGS) +- Update InfluxDB links in Known Users section of documentation [#9092](https://github.com/apache/arrow-datafusion/pull/9092) (alamb) +- Support `FixedSizeList` type coercion [#8902](https://github.com/apache/arrow-datafusion/pull/8902) (Weijun-H) +- Improve Canonicalize API [#8983](https://github.com/apache/arrow-datafusion/pull/8983) (alamb) +- Update env_logger requirement from 0.10 to 0.11 [#8944](https://github.com/apache/arrow-datafusion/pull/8944) (dependabot[bot]) +- Split count_distinct.rs into separate modules [#9087](https://github.com/apache/arrow-datafusion/pull/9087) (alamb) +- Fix update_expr for projection pushdown [#9096](https://github.com/apache/arrow-datafusion/pull/9096) (viirya) +- Improve `InListSImplifier` -- add test, commend and avoid clones [#8971](https://github.com/apache/arrow-datafusion/pull/8971) (alamb) +- feat: issue #8969 adding position function [#8988](https://github.com/apache/arrow-datafusion/pull/8988) (Lordworms) +- Cleanup regex_expressions.rs to remove \_regexp_match function [#9107](https://github.com/apache/arrow-datafusion/pull/9107) (Omega359) +- Unnest with single expression [#9069](https://github.com/apache/arrow-datafusion/pull/9069) (jayzhan211) +- Minor: improve GroupsAccumulator and Accumulator documentation [#8963](https://github.com/apache/arrow-datafusion/pull/8963) (alamb) +- move InList related simplify to one place [#9037](https://github.com/apache/arrow-datafusion/pull/9037) (guojidan) +- docs: add docs and example showing how to get the expression data type [#9118](https://github.com/apache/arrow-datafusion/pull/9118) (r3stl355) +- Add http(s) support to the command line [#8753](https://github.com/apache/arrow-datafusion/pull/8753) (kcolford) +- Remove External Table Backwards Compatibility Options [#9105](https://github.com/apache/arrow-datafusion/pull/9105) (yyy1000) +- feat: support `LargeList` in `flatten` [#9110](https://github.com/apache/arrow-datafusion/pull/9110) (Weijun-H) +- feat: improve `make_date` performance [#9112](https://github.com/apache/arrow-datafusion/pull/9112) (r3stl355) +- Refactor min/max value update in Parquet statistics [#9120](https://github.com/apache/arrow-datafusion/pull/9120) (Weijun-H) +- chore: Fix incorrect comment in substrait consumer [#9123](https://github.com/apache/arrow-datafusion/pull/9123) (caicancai) +- Minor: Fix Self referential links in readme [#9119](https://github.com/apache/arrow-datafusion/pull/9119) (alamb) +- Add `ColumnarValue::values_to_arrays`, deprecate `columnar_values_to_array` [#9114](https://github.com/apache/arrow-datafusion/pull/9114) (alamb) +- Support Copy with Remote Object Stores in datafusion-cli [#9064](https://github.com/apache/arrow-datafusion/pull/9064) (manoj-inukolunu) +- Fix Dockerfile min rust version to 1.72 [#9135](https://github.com/apache/arrow-datafusion/pull/9135) (alamb) +- fix: schema metadata retrieval when listing parquet table [#9134](https://github.com/apache/arrow-datafusion/pull/9134) (brayanjuls) +- Update parse_protobuf_file_scan_config to remove any partition columns from the file_schema in FileScanConfig [#9126](https://github.com/apache/arrow-datafusion/pull/9126) (bcmcmill) +- feat: add github action to self-assign the issue [#9132](https://github.com/apache/arrow-datafusion/pull/9132) (r3stl355) +- Fix NULL values in FixedSizeList creation [#9141](https://github.com/apache/arrow-datafusion/pull/9141) (Weijun-H) +- Add `FunctionRegistry::register_udaf` and `FunctionRegistry::register_udwf` [#9075](https://github.com/apache/arrow-datafusion/pull/9075) (alamb) +- Change ScalarValue::Struct to ArrayRef [#7893](https://github.com/apache/arrow-datafusion/pull/7893) (jayzhan211) +- Support join filter for `SortMergeJoin` [#9080](https://github.com/apache/arrow-datafusion/pull/9080) (viirya) +- Typo in docstring [#9149](https://github.com/apache/arrow-datafusion/pull/9149) (tv42) +- RecordBatchReceiverStreamBuilder: don't stringify errors [#9155](https://github.com/apache/arrow-datafusion/pull/9155) (tv42) +- port position test to scalar [#9128](https://github.com/apache/arrow-datafusion/pull/9128) (Lordworms) +- Minor: Improve `DataFrame` docs, add examples [#9159](https://github.com/apache/arrow-datafusion/pull/9159) (alamb) +- feat: add ability to query the remote http(s) location directly in datafusion-cli [#9150](https://github.com/apache/arrow-datafusion/pull/9150) (r3stl355) +- Add `regexp_like, improve docs and examples for `regexp_match` [#9137](https://github.com/apache/arrow-datafusion/pull/9137) (Omega359) +- Partial Sort Plan Implementation [#9125](https://github.com/apache/arrow-datafusion/pull/9125) (ahmetenis) +- Update tonic requirement from 0.10 to 0.11 [#9176](https://github.com/apache/arrow-datafusion/pull/9176) (dependabot[bot]) +- minor: fix error message function naming [#9168](https://github.com/apache/arrow-datafusion/pull/9168) (comphead) +- Minor: Update `DataFrame::write_table` docs [#9169](https://github.com/apache/arrow-datafusion/pull/9169) (alamb) +- Improve PhysicalExpr documentation [#9180](https://github.com/apache/arrow-datafusion/pull/9180) (alamb) +- Fix sphinx warnings [#9142](https://github.com/apache/arrow-datafusion/pull/9142) (ongchi) +- Use concat to simplify Nested Scalar creation [#9174](https://github.com/apache/arrow-datafusion/pull/9174) (jayzhan211) +- Minor: Remove unecessary map_err [#9186](https://github.com/apache/arrow-datafusion/pull/9186) (alamb) +- Add example of using `PruningPredicate` to datafusion-examples [#9183](https://github.com/apache/arrow-datafusion/pull/9183) (alamb) +- Use prep_null_mask_filter to handle nulls in selection mask [#9163](https://github.com/apache/arrow-datafusion/pull/9163) (viirya) +- [Document] Adding UDF by impl ScalarUDFImpl [#9172](https://github.com/apache/arrow-datafusion/pull/9172) (yyy1000) +- Docs: Extend `PruningPredicate` with background and implementation info [#9184](https://github.com/apache/arrow-datafusion/pull/9184) (alamb) +- chore: make tokio a workspace dependency [#9187](https://github.com/apache/arrow-datafusion/pull/9187) (PsiACE) +- Examples link in catalogs.rs leads to a 404 [#9194](https://github.com/apache/arrow-datafusion/pull/9194) (Omega359) +- Add test pipeline for Mac aarch64 [#9191](https://github.com/apache/arrow-datafusion/pull/9191) (viirya) +- Add string aggregate grouping fuzz test, add `MemTable::with_sort_exprs` [#9190](https://github.com/apache/arrow-datafusion/pull/9190) (alamb) +- Create `datafusion-functions-array` crate and move `ArrayToString` function into it [#9113](https://github.com/apache/arrow-datafusion/pull/9113) (alamb) +- Add constant expression support to equivalence properties [#9198](https://github.com/apache/arrow-datafusion/pull/9198) (mustafasrepo) +- chore: update tpch-docker docker repository [#9204](https://github.com/apache/arrow-datafusion/pull/9204) (pmcgleenon) +- feat: implement select directly from s3 and gcs locations in datafusion-cli [#9199](https://github.com/apache/arrow-datafusion/pull/9199) (r3stl355) +- MINOR: Add "fs" feature to "tokio", fix "features" typo. [#9210](https://github.com/apache/arrow-datafusion/pull/9210) (mustafasrepo) +- Add `to_char` function implementation using chrono formats [#9181](https://github.com/apache/arrow-datafusion/pull/9181) (Omega359) +- Add `SessionContext::read_batches` [#9197](https://github.com/apache/arrow-datafusion/pull/9197) (Lordworms) +- feat: support block gzip for streams [#9175](https://github.com/apache/arrow-datafusion/pull/9175) (tshauck) +- chore(pruning): Support `IS NOT NULL` predicates in `PruningPredicate` [#9208](https://github.com/apache/arrow-datafusion/pull/9208) (appletreeisyellow) +- Add cargo audit CI [#9182](https://github.com/apache/arrow-datafusion/pull/9182) (ongchi) +- Move `nullif` and `isnan` to datafusion-functions [#9216](https://github.com/apache/arrow-datafusion/pull/9216) (alamb) +- Bugfix - Projection Removal Conditions [#9215](https://github.com/apache/arrow-datafusion/pull/9215) (berkaysynnada) +- Partitioning fixes [#9207](https://github.com/apache/arrow-datafusion/pull/9207) (esheppa) +- Return an error when a column does not exist in window function [#9202](https://github.com/apache/arrow-datafusion/pull/9202) (PhVHoang) +- Revert "chore(pruning): Support `IS NOT NULL` predicates in `PruningPredicate` (#9208)" [#9232](https://github.com/apache/arrow-datafusion/pull/9232) (appletreeisyellow) +- Improve documentation on how to build `ScalarValue::Struct` and add `ScalarStructBuilder` [#9229](https://github.com/apache/arrow-datafusion/pull/9229) (alamb) +- Minor: improve Display of output ordering of `StreamTableExec` [#9225](https://github.com/apache/arrow-datafusion/pull/9225) (mustafasrepo) +- Support compute return types from argument values (not just their DataTypes) [#8985](https://github.com/apache/arrow-datafusion/pull/8985) (yyy1000) +- Dont call multiunzip when no stats [#9220](https://github.com/apache/arrow-datafusion/pull/9220) (matthewmturner) +- Use setup-macos-aarch64-builder for aarch64 CI pipeline [#9242](https://github.com/apache/arrow-datafusion/pull/9242) (viirya) +- GROUP-BY prioritizes input columns in case of ambiguity [#9228](https://github.com/apache/arrow-datafusion/pull/9228) (jonahgao) +- Minor: chore: improve catalog test in mod.rs [#9244](https://github.com/apache/arrow-datafusion/pull/9244) (caicancai) +- Add example for `ScalarStructBuilder::new_null`, fix display for `null` `ScalarValue::Struct` [#9238](https://github.com/apache/arrow-datafusion/pull/9238) (alamb) diff --git a/dev/update_datafusion_versions.py b/dev/update_datafusion_versions.py index 19701b813671..12b0a90d4ab6 100755 --- a/dev/update_datafusion_versions.py +++ b/dev/update_datafusion_versions.py @@ -28,20 +28,22 @@ import tomlkit crates = { - 'datafusion': 'datafusion/core/Cargo.toml', - 'datafusion-cli': 'datafusion-cli/Cargo.toml', 'datafusion-common': 'datafusion/common/Cargo.toml', - 'datafusion-expr': 'datafusion/expr/Cargo.toml', + 'datafusion': 'datafusion/core/Cargo.toml', 'datafusion-execution': 'datafusion/execution/Cargo.toml', + 'datafusion-expr': 'datafusion/expr/Cargo.toml', + 'datafusion-functions': 'datafusion/functions/Cargo.toml', + 'datafusion-functions-array': 'datafusion/functions-array/Cargo.toml', 'datafusion-optimizer': 'datafusion/optimizer/Cargo.toml', 'datafusion-physical-expr': 'datafusion/physical-expr/Cargo.toml', 'datafusion-physical-plan': 'datafusion/physical-plan/Cargo.toml', 'datafusion-proto': 'datafusion/proto/Cargo.toml', - 'datafusion-substrait': 'datafusion/substrait/Cargo.toml', 'datafusion-sql': 'datafusion/sql/Cargo.toml', 'datafusion-sqllogictest': 'datafusion/sqllogictest/Cargo.toml', + 'datafusion-substrait': 'datafusion/substrait/Cargo.toml', 'datafusion-wasmtest': 'datafusion/wasmtest/Cargo.toml', 'datafusion-benchmarks': 'benchmarks/Cargo.toml', + 'datafusion-cli': 'datafusion-cli/Cargo.toml', 'datafusion-examples': 'datafusion-examples/Cargo.toml', 'datafusion-docs': 'docs/Cargo.toml', } @@ -55,9 +57,18 @@ def update_workspace_version(new_version: str): doc = tomlkit.parse(data) pkg = doc.get('workspace').get('package') - print('workspace pacakge', pkg) + print('workspace package', pkg) pkg['version'] = new_version + doc = tomlkit.parse(data) + + for crate in crates.keys(): + df_dep = doc.get('workspace').get('dependencies', {}).get(crate) + # skip crates that pin datafusion using git hash + if df_dep is not None and df_dep.get('version') is not None: + print(f'updating {crate} dependency in {cargo_toml}') + df_dep['version'] = new_version + with open(cargo_toml, 'w') as f: f.write(tomlkit.dumps(doc)) diff --git a/docs/Cargo.toml b/docs/Cargo.toml index 7eecd11df80b..39f4520ff7b2 100644 --- a/docs/Cargo.toml +++ b/docs/Cargo.toml @@ -29,4 +29,4 @@ authors = { workspace = true } rust-version = { workspace = true } [dependencies] -datafusion = { path = "../datafusion/core", version = "35.0.0", default-features = false } +datafusion = { path = "../datafusion/core", version = "36.0.0", default-features = false } diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 8b039102d4d7..081eb44230f9 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -64,7 +64,7 @@ Environment variables are read during `SessionConfig` initialisation so they mus | datafusion.execution.parquet.statistics_enabled | NULL | Sets if statistics are enabled for any column Valid values are: "none", "chunk", and "page" These values are not case sensitive. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_statistics_size | NULL | Sets max statistics size for any column. If NULL, uses default parquet writer setting | | datafusion.execution.parquet.max_row_group_size | 1048576 | Target maximum number of rows in each row group (defaults to 1M rows). Writing larger row groups requires more memory to write, but can get better compression and be faster to read. | -| datafusion.execution.parquet.created_by | datafusion version 35.0.0 | Sets "created by" property | +| datafusion.execution.parquet.created_by | datafusion version 36.0.0 | Sets "created by" property | | datafusion.execution.parquet.column_index_truncate_length | NULL | Sets column index truncate length | | datafusion.execution.parquet.data_page_row_count_limit | 18446744073709551615 | Sets best effort maximum number of rows in data page | | datafusion.execution.parquet.encoding | NULL | Sets default encoding for any column Valid values are: plain, plain_dictionary, rle, bit_packed, delta_binary_packed, delta_length_byte_array, delta_byte_array, rle_dictionary, and byte_stream_split. These values are not case sensitive. If NULL, uses default parquet writer setting |