From 4ea536b70cd03d356e9b281f50b70c01ee4a462c Mon Sep 17 00:00:00 2001 From: SteveLauC Date: Sun, 3 Mar 2024 15:03:20 +0800 Subject: [PATCH] refactor: make instr() an alias of strpos() (#9396) --- datafusion-cli/Cargo.lock | 112 +++++------ .../tests/dataframe/dataframe_functions.rs | 20 -- datafusion/expr/src/built_in_function.rs | 12 +- datafusion/expr/src/expr_fn.rs | 2 - datafusion/physical-expr/src/functions.rs | 179 ++++++++---------- datafusion/proto/proto/datafusion.proto | 2 +- datafusion/proto/src/generated/pbjson.rs | 3 - datafusion/proto/src/generated/prost.rs | 4 +- .../proto/src/logical_plan/from_proto.rs | 9 +- datafusion/proto/src/logical_plan/to_proto.rs | 1 - datafusion/sql/src/expr/mod.rs | 2 +- datafusion/sqllogictest/test_files/scalar.slt | 2 +- .../source/user-guide/sql/scalar_functions.md | 16 +- 13 files changed, 153 insertions(+), 211 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index 40f9bd9dd4b5..26805b709b92 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -270,7 +270,7 @@ dependencies = [ "arrow-schema", "chrono", "half", - "indexmap 2.2.3", + "indexmap 2.2.4", "lexical-core", "num", "serde", @@ -384,7 +384,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -874,7 +874,7 @@ dependencies = [ "iana-time-zone", "num-traits", "serde", - "windows-targets 0.52.3", + "windows-targets 0.52.4", ] [[package]] @@ -1073,7 +1073,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ad291aa74992b9b7a7e88c38acbbf6ad7e107f1d90ee8775b7bc1fc3394f485c" dependencies = [ "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -1126,7 +1126,7 @@ dependencies = [ "glob", "half", "hashbrown 0.14.3", - "indexmap 2.2.3", + "indexmap 2.2.4", "itertools", "log", "num-traits", @@ -1299,7 +1299,7 @@ dependencies = [ "half", "hashbrown 0.14.3", "hex", - "indexmap 2.2.3", + "indexmap 2.2.4", "itertools", "log", "md-5", @@ -1331,7 +1331,7 @@ dependencies = [ "futures", "half", "hashbrown 0.14.3", - "indexmap 2.2.3", + "indexmap 2.2.4", "itertools", "log", "once_cell", @@ -1619,7 +1619,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -1703,7 +1703,7 @@ dependencies = [ "futures-sink", "futures-util", "http", - "indexmap 2.2.3", + "indexmap 2.2.4", "slab", "tokio", "tokio-util", @@ -1763,9 +1763,9 @@ dependencies = [ [[package]] name = "hermit-abi" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "379dada1584ad501b383485dd706b8afb7a70fcbc7f4da7d780638a5a6124a60" +checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hex" @@ -1920,9 +1920,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.3" +version = "2.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "233cf39063f058ea2caae4091bf4a3ef70a653afbc026f5c4a4135d114e3c177" +checksum = "967d6dd42f16dbf0eb8040cb9e477933562684d3918f7d253f2ff9087fb3e7a3" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -2121,9 +2121,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.20" +version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" [[package]] name = "lz4_flex" @@ -2310,7 +2310,7 @@ version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" dependencies = [ - "hermit-abi 0.3.8", + "hermit-abi 0.3.9", "libc", ] @@ -2472,7 +2472,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.2.3", + "indexmap 2.2.4", ] [[package]] @@ -2530,7 +2530,7 @@ checksum = "266c042b60c9c76b8d53061e52b2e0d1116abc57cefc8c5cd671619a56ac3690" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3071,7 +3071,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3206,7 +3206,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3252,7 +3252,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3265,7 +3265,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3287,9 +3287,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.51" +version = "2.0.52" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ab617d94515e94ae53b8406c628598680aa0c9587474ecbe58188f7b345d66c" +checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" dependencies = [ "proc-macro2", "quote", @@ -3373,7 +3373,7 @@ checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3468,7 +3468,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3565,7 +3565,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3610,7 +3610,7 @@ checksum = "f03ca4cb38206e2bef0700092660bb74d696f808514dae47fa1467cbfe26e96e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] @@ -3764,7 +3764,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", "wasm-bindgen-shared", ] @@ -3798,7 +3798,7 @@ checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3879,7 +3879,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets 0.52.3", + "windows-targets 0.52.4", ] [[package]] @@ -3897,7 +3897,7 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets 0.52.3", + "windows-targets 0.52.4", ] [[package]] @@ -3917,17 +3917,17 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d380ba1dc7187569a8a9e91ed34b8ccfc33123bbacb8c0aed2d1ad7f3ef2dc5f" +checksum = "7dd37b7e5ab9018759f893a1952c9420d060016fc19a472b4bb20d1bdd694d1b" dependencies = [ - "windows_aarch64_gnullvm 0.52.3", - "windows_aarch64_msvc 0.52.3", - "windows_i686_gnu 0.52.3", - "windows_i686_msvc 0.52.3", - "windows_x86_64_gnu 0.52.3", - "windows_x86_64_gnullvm 0.52.3", - "windows_x86_64_msvc 0.52.3", + "windows_aarch64_gnullvm 0.52.4", + "windows_aarch64_msvc 0.52.4", + "windows_i686_gnu 0.52.4", + "windows_i686_msvc 0.52.4", + "windows_x86_64_gnu 0.52.4", + "windows_x86_64_gnullvm 0.52.4", + "windows_x86_64_msvc 0.52.4", ] [[package]] @@ -3938,9 +3938,9 @@ checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" [[package]] name = "windows_aarch64_gnullvm" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68e5dcfb9413f53afd9c8f86e56a7b4d86d9a2fa26090ea2dc9e40fba56c6ec6" +checksum = "bcf46cf4c365c6f2d1cc93ce535f2c8b244591df96ceee75d8e83deb70a9cac9" [[package]] name = "windows_aarch64_msvc" @@ -3950,9 +3950,9 @@ checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" [[package]] name = "windows_aarch64_msvc" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8dab469ebbc45798319e69eebf92308e541ce46760b49b18c6b3fe5e8965b30f" +checksum = "da9f259dd3bcf6990b55bffd094c4f7235817ba4ceebde8e6d11cd0c5633b675" [[package]] name = "windows_i686_gnu" @@ -3962,9 +3962,9 @@ checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" [[package]] name = "windows_i686_gnu" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a4e9b6a7cac734a8b4138a4e1044eac3404d8326b6c0f939276560687a033fb" +checksum = "b474d8268f99e0995f25b9f095bc7434632601028cf86590aea5c8a5cb7801d3" [[package]] name = "windows_i686_msvc" @@ -3974,9 +3974,9 @@ checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" [[package]] name = "windows_i686_msvc" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b0ec9c422ca95ff34a78755cfa6ad4a51371da2a5ace67500cf7ca5f232c58" +checksum = "1515e9a29e5bed743cb4415a9ecf5dfca648ce85ee42e15873c3cd8610ff8e02" [[package]] name = "windows_x86_64_gnu" @@ -3986,9 +3986,9 @@ checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" [[package]] name = "windows_x86_64_gnu" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "704131571ba93e89d7cd43482277d6632589b18ecf4468f591fbae0a8b101614" +checksum = "5eee091590e89cc02ad514ffe3ead9eb6b660aedca2183455434b93546371a03" [[package]] name = "windows_x86_64_gnullvm" @@ -3998,9 +3998,9 @@ checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" [[package]] name = "windows_x86_64_gnullvm" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42079295511643151e98d61c38c0acc444e52dd42ab456f7ccfd5152e8ecf21c" +checksum = "77ca79f2451b49fa9e2af39f0747fe999fcda4f5e241b2898624dca97a1f2177" [[package]] name = "windows_x86_64_msvc" @@ -4010,9 +4010,9 @@ checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" [[package]] name = "windows_x86_64_msvc" -version = "0.52.3" +version = "0.52.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0770833d60a970638e989b3fa9fd2bb1aaadcf88963d1659fd7d9990196ed2d6" +checksum = "32b752e52a2da0ddfbdbcc6fceadfeede4c939ed16d13e648833a61dfb611ed8" [[package]] name = "winreg" @@ -4056,7 +4056,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.51", + "syn 2.0.52", ] [[package]] diff --git a/datafusion/core/tests/dataframe/dataframe_functions.rs b/datafusion/core/tests/dataframe/dataframe_functions.rs index ff553a48888b..8bb23e96e0e2 100644 --- a/datafusion/core/tests/dataframe/dataframe_functions.rs +++ b/datafusion/core/tests/dataframe/dataframe_functions.rs @@ -282,26 +282,6 @@ async fn test_fn_initcap() -> Result<()> { Ok(()) } -#[tokio::test] -async fn test_fn_instr() -> Result<()> { - let expr = instr(col("a"), lit("b")); - - let expected = [ - "+-------------------------+", - "| instr(test.a,Utf8(\"b\")) |", - "+-------------------------+", - "| 2 |", - "| 2 |", - "| 0 |", - "| 5 |", - "+-------------------------+", - ]; - - assert_fn_batches!(expr, expected); - - Ok(()) -} - #[tokio::test] #[cfg(feature = "unicode_expressions")] async fn test_fn_left() -> Result<()> { diff --git a/datafusion/expr/src/built_in_function.rs b/datafusion/expr/src/built_in_function.rs index e658cde4dd18..c04d86715693 100644 --- a/datafusion/expr/src/built_in_function.rs +++ b/datafusion/expr/src/built_in_function.rs @@ -204,8 +204,6 @@ pub enum BuiltinScalarFunction { EndsWith, /// initcap InitCap, - /// InStr - InStr, /// left Left, /// lpad @@ -414,7 +412,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::DateBin => Volatility::Immutable, BuiltinScalarFunction::EndsWith => Volatility::Immutable, BuiltinScalarFunction::InitCap => Volatility::Immutable, - BuiltinScalarFunction::InStr => Volatility::Immutable, BuiltinScalarFunction::Left => Volatility::Immutable, BuiltinScalarFunction::Lpad => Volatility::Immutable, BuiltinScalarFunction::Lower => Volatility::Immutable, @@ -665,9 +662,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::InitCap => { utf8_to_str_type(&input_expr_types[0], "initcap") } - BuiltinScalarFunction::InStr => { - utf8_to_int_type(&input_expr_types[0], "instr/position") - } BuiltinScalarFunction::Left => utf8_to_str_type(&input_expr_types[0], "left"), BuiltinScalarFunction::Lower => { utf8_to_str_type(&input_expr_types[0], "lower") @@ -728,7 +722,7 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::StartsWith => Ok(Boolean), BuiltinScalarFunction::EndsWith => Ok(Boolean), BuiltinScalarFunction::Strpos => { - utf8_to_int_type(&input_expr_types[0], "strpos") + utf8_to_int_type(&input_expr_types[0], "strpos/instr/position") } BuiltinScalarFunction::Substr => { utf8_to_str_type(&input_expr_types[0], "substr") @@ -1143,7 +1137,6 @@ impl BuiltinScalarFunction { ), BuiltinScalarFunction::EndsWith - | BuiltinScalarFunction::InStr | BuiltinScalarFunction::Strpos | BuiltinScalarFunction::StartsWith => Signature::one_of( vec![ @@ -1399,7 +1392,6 @@ impl BuiltinScalarFunction { BuiltinScalarFunction::Chr => &["chr"], BuiltinScalarFunction::EndsWith => &["ends_with"], BuiltinScalarFunction::InitCap => &["initcap"], - BuiltinScalarFunction::InStr => &["instr", "position"], BuiltinScalarFunction::Left => &["left"], BuiltinScalarFunction::Lower => &["lower"], BuiltinScalarFunction::Lpad => &["lpad"], @@ -1416,7 +1408,7 @@ impl BuiltinScalarFunction { &["string_to_array", "string_to_list"] } BuiltinScalarFunction::StartsWith => &["starts_with"], - BuiltinScalarFunction::Strpos => &["strpos"], + BuiltinScalarFunction::Strpos => &["strpos", "instr", "position"], BuiltinScalarFunction::Substr => &["substr"], BuiltinScalarFunction::ToHex => &["to_hex"], BuiltinScalarFunction::Translate => &["translate"], diff --git a/datafusion/expr/src/expr_fn.rs b/datafusion/expr/src/expr_fn.rs index 07844c27fe50..ba8b76ac6fac 100644 --- a/datafusion/expr/src/expr_fn.rs +++ b/datafusion/expr/src/expr_fn.rs @@ -767,7 +767,6 @@ scalar_expr!( ); scalar_expr!(Digest, digest, input algorithm, "compute the binary hash of `input`, using the `algorithm`"); scalar_expr!(InitCap, initcap, string, "converts the first letter of each word in `string` in uppercase and the remaining characters in lowercase"); -scalar_expr!(InStr, instr, string substring, "returns the position of the first occurrence of `substring` in `string`"); scalar_expr!(Left, left, string n, "returns the first `n` characters in the `string`"); scalar_expr!(Lower, lower, string, "convert the string to lower case"); scalar_expr!( @@ -1313,7 +1312,6 @@ mod test { test_scalar_expr!(Gcd, gcd, arg_1, arg_2); test_scalar_expr!(Lcm, lcm, arg_1, arg_2); test_scalar_expr!(InitCap, initcap, string); - test_scalar_expr!(InStr, instr, string, substring); test_scalar_expr!(Left, left, string, count); test_scalar_expr!(Lower, lower, string); test_nary_scalar_expr!(Lpad, lpad, string, count); diff --git a/datafusion/physical-expr/src/functions.rs b/datafusion/physical-expr/src/functions.rs index d9d1d704db2e..14ab25e96173 100644 --- a/datafusion/physical-expr/src/functions.rs +++ b/datafusion/physical-expr/src/functions.rs @@ -510,15 +510,6 @@ pub fn create_physical_fun( exec_err!("Unsupported data type {other:?} for function initcap") } }), - BuiltinScalarFunction::InStr => Arc::new(|args| match args[0].data_type() { - DataType::Utf8 => { - make_scalar_function_inner(string_expressions::instr::)(args) - } - DataType::LargeUtf8 => { - make_scalar_function_inner(string_expressions::instr::)(args) - } - other => exec_err!("Unsupported data type {other:?} for function instr"), - }), BuiltinScalarFunction::Left => Arc::new(|args| match args[0].data_type() { DataType::Utf8 => { let func = invoke_if_unicode_expressions_feature_flag!(left, i32, "left"); @@ -1355,95 +1346,6 @@ mod tests { Utf8, StringArray ); - test_function!( - InStr, - &[lit("abc"), lit("b")], - Ok(Some(2)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("abc"), lit("c")], - Ok(Some(3)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("abc"), lit("d")], - Ok(Some(0)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("abc"), lit("")], - Ok(Some(1)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("Helloworld"), lit("world")], - Ok(Some(6)), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit("Helloworld"), lit(ScalarValue::Utf8(None))], - Ok(None), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[lit(ScalarValue::Utf8(None)), lit("Hello")], - Ok(None), - i32, - Int32, - Int32Array - ); - test_function!( - InStr, - &[ - lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))), - lit(ScalarValue::LargeUtf8(Some("world".to_string()))) - ], - Ok(Some(6)), - i64, - Int64, - Int64Array - ); - test_function!( - InStr, - &[ - lit(ScalarValue::LargeUtf8(None)), - lit(ScalarValue::LargeUtf8(Some("world".to_string()))) - ], - Ok(None), - i64, - Int64, - Int64Array - ); - test_function!( - InStr, - &[ - lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))), - lit(ScalarValue::LargeUtf8(None)) - ], - Ok(None), - i64, - Int64, - Int64Array - ); #[cfg(feature = "unicode_expressions")] test_function!( Left, @@ -2604,6 +2506,87 @@ mod tests { Int32Array ); #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit("abc"), lit("d")], + Ok(Some(0)), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit("abc"), lit("")], + Ok(Some(1)), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit("Helloworld"), lit("world")], + Ok(Some(6)), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit("Helloworld"), lit(ScalarValue::Utf8(None))], + Ok(None), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[lit(ScalarValue::Utf8(None)), lit("Hello")], + Ok(None), + i32, + Int32, + Int32Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[ + lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))), + lit(ScalarValue::LargeUtf8(Some("world".to_string()))) + ], + Ok(Some(6)), + i64, + Int64, + Int64Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[ + lit(ScalarValue::LargeUtf8(None)), + lit(ScalarValue::LargeUtf8(Some("world".to_string()))) + ], + Ok(None), + i64, + Int64, + Int64Array + ); + #[cfg(feature = "unicode_expressions")] + test_function!( + Strpos, + &[ + lit(ScalarValue::LargeUtf8(Some("Helloworld".to_string()))), + lit(ScalarValue::LargeUtf8(None)) + ], + Ok(None), + i64, + Int64, + Int64Array + ); + #[cfg(feature = "unicode_expressions")] test_function!( Strpos, &[lit("josé"), lit("é"),], diff --git a/datafusion/proto/proto/datafusion.proto b/datafusion/proto/proto/datafusion.proto index ef2db5a75d57..526ee7704b5b 100644 --- a/datafusion/proto/proto/datafusion.proto +++ b/datafusion/proto/proto/datafusion.proto @@ -678,7 +678,7 @@ enum ScalarFunction { ArrayDistinct = 129; ArrayResize = 130; EndsWith = 131; - InStr = 132; + /// 132 was InStr MakeDate = 133; ArrayReverse = 134; RegexpLike = 135; diff --git a/datafusion/proto/src/generated/pbjson.rs b/datafusion/proto/src/generated/pbjson.rs index e9e1f270864a..56f0880074ea 100644 --- a/datafusion/proto/src/generated/pbjson.rs +++ b/datafusion/proto/src/generated/pbjson.rs @@ -22436,7 +22436,6 @@ impl serde::Serialize for ScalarFunction { Self::ArrayDistinct => "ArrayDistinct", Self::ArrayResize => "ArrayResize", Self::EndsWith => "EndsWith", - Self::InStr => "InStr", Self::MakeDate => "MakeDate", Self::ArrayReverse => "ArrayReverse", Self::RegexpLike => "RegexpLike", @@ -22567,7 +22566,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayDistinct", "ArrayResize", "EndsWith", - "InStr", "MakeDate", "ArrayReverse", "RegexpLike", @@ -22727,7 +22725,6 @@ impl<'de> serde::Deserialize<'de> for ScalarFunction { "ArrayDistinct" => Ok(ScalarFunction::ArrayDistinct), "ArrayResize" => Ok(ScalarFunction::ArrayResize), "EndsWith" => Ok(ScalarFunction::EndsWith), - "InStr" => Ok(ScalarFunction::InStr), "MakeDate" => Ok(ScalarFunction::MakeDate), "ArrayReverse" => Ok(ScalarFunction::ArrayReverse), "RegexpLike" => Ok(ScalarFunction::RegexpLike), diff --git a/datafusion/proto/src/generated/prost.rs b/datafusion/proto/src/generated/prost.rs index eca0156dd223..187085454a7f 100644 --- a/datafusion/proto/src/generated/prost.rs +++ b/datafusion/proto/src/generated/prost.rs @@ -2767,7 +2767,7 @@ pub enum ScalarFunction { ArrayDistinct = 129, ArrayResize = 130, EndsWith = 131, - InStr = 132, + /// / 132 was InStr MakeDate = 133, ArrayReverse = 134, RegexpLike = 135, @@ -2896,7 +2896,6 @@ impl ScalarFunction { ScalarFunction::ArrayDistinct => "ArrayDistinct", ScalarFunction::ArrayResize => "ArrayResize", ScalarFunction::EndsWith => "EndsWith", - ScalarFunction::InStr => "InStr", ScalarFunction::MakeDate => "MakeDate", ScalarFunction::ArrayReverse => "ArrayReverse", ScalarFunction::RegexpLike => "RegexpLike", @@ -3021,7 +3020,6 @@ impl ScalarFunction { "ArrayDistinct" => Some(Self::ArrayDistinct), "ArrayResize" => Some(Self::ArrayResize), "EndsWith" => Some(Self::EndsWith), - "InStr" => Some(Self::InStr), "MakeDate" => Some(Self::MakeDate), "ArrayReverse" => Some(Self::ArrayReverse), "RegexpLike" => Some(Self::RegexpLike), diff --git a/datafusion/proto/src/logical_plan/from_proto.rs b/datafusion/proto/src/logical_plan/from_proto.rs index 29e51e03730e..327902c98b73 100644 --- a/datafusion/proto/src/logical_plan/from_proto.rs +++ b/datafusion/proto/src/logical_plan/from_proto.rs @@ -57,8 +57,8 @@ use datafusion_expr::{ current_date, current_time, date_bin, date_part, date_trunc, degrees, digest, ends_with, exp, expr::{self, InList, Sort, WindowFunction}, - factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, instr, iszero, - lcm, left, levenshtein, ln, log, log10, log2, + factorial, find_in_set, flatten, floor, from_unixtime, gcd, initcap, iszero, lcm, + left, levenshtein, ln, log, log10, log2, logical_plan::{PlanType, StringifiedPlan}, lower, lpad, ltrim, md5, nanvl, now, octet_length, overlay, pi, power, radians, random, regexp_like, regexp_replace, repeat, replace, reverse, right, round, rpad, @@ -527,7 +527,6 @@ impl From<&protobuf::ScalarFunction> for BuiltinScalarFunction { ScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator, ScalarFunction::EndsWith => Self::EndsWith, ScalarFunction::InitCap => Self::InitCap, - ScalarFunction::InStr => Self::InStr, ScalarFunction::Left => Self::Left, ScalarFunction::Lpad => Self::Lpad, ScalarFunction::Random => Self::Random, @@ -1651,10 +1650,6 @@ pub fn parse_expr( ScalarFunction::InitCap => { Ok(initcap(parse_expr(&args[0], registry, codec)?)) } - ScalarFunction::InStr => Ok(instr( - parse_expr(&args[0], registry, codec)?, - parse_expr(&args[1], registry, codec)?, - )), ScalarFunction::Gcd => Ok(gcd( parse_expr(&args[0], registry, codec)?, parse_expr(&args[1], registry, codec)?, diff --git a/datafusion/proto/src/logical_plan/to_proto.rs b/datafusion/proto/src/logical_plan/to_proto.rs index 424e1414af11..ad618790a55c 100644 --- a/datafusion/proto/src/logical_plan/to_proto.rs +++ b/datafusion/proto/src/logical_plan/to_proto.rs @@ -1504,7 +1504,6 @@ impl TryFrom<&BuiltinScalarFunction> for protobuf::ScalarFunction { BuiltinScalarFunction::ConcatWithSeparator => Self::ConcatWithSeparator, BuiltinScalarFunction::EndsWith => Self::EndsWith, BuiltinScalarFunction::InitCap => Self::InitCap, - BuiltinScalarFunction::InStr => Self::InStr, BuiltinScalarFunction::Left => Self::Left, BuiltinScalarFunction::Lpad => Self::Lpad, BuiltinScalarFunction::Random => Self::Random, diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs index b058fb79b4a1..d36d973cbee6 100644 --- a/datafusion/sql/src/expr/mod.rs +++ b/datafusion/sql/src/expr/mod.rs @@ -750,7 +750,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { schema: &DFSchema, planner_context: &mut PlannerContext, ) -> Result { - let fun = BuiltinScalarFunction::InStr; + let fun = BuiltinScalarFunction::Strpos; let substr = self.sql_expr_to_logical_expr(substr_expr, schema, planner_context)?; let fullstr = self.sql_expr_to_logical_expr(str_expr, schema, planner_context)?; diff --git a/datafusion/sqllogictest/test_files/scalar.slt b/datafusion/sqllogictest/test_files/scalar.slt index 48b5a0af7253..5ff253c1a34a 100644 --- a/datafusion/sqllogictest/test_files/scalar.slt +++ b/datafusion/sqllogictest/test_files/scalar.slt @@ -2042,5 +2042,5 @@ select position('' in '') 1 -query error DataFusion error: Error during planning: The INSTR/POSITION function can only accept strings, but got Int64. +query error DataFusion error: Error during planning: The STRPOS/INSTR/POSITION function can only accept strings, but got Int64. select position(1 in 1) diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index cd1fbdabea1c..41cf81fe1ed4 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -832,12 +832,7 @@ initcap(str) ### `instr` -Returns the location where substr first appeared in str (counting from 1). -If substr does not appear in str, return 0. - -``` -instr(str, substr) -``` +_Alias of [strpos](#strpos)._ #### Arguments @@ -1108,6 +1103,10 @@ strpos(str, substr) - **substr**: Substring expression to search for. Can be a constant, column, or function, and any combination of string operators. +#### Aliases + +- instr + ### `substr` Extracts a substring of a specified number of characters from a specific @@ -1440,7 +1439,8 @@ Additional examples can be found [here](https://github.com/apache/arrow-datafusi ### `position` -Returns the position of substr in orig_str +Returns the position of `substr` in `origstr` (counting from 1). If `substr` does +not appear in `origstr`, return 0. ``` position(substr in origstr) @@ -1448,7 +1448,7 @@ position(substr in origstr) #### Arguments -- **substr**: he pattern string. +- **substr**: The pattern string. - **origstr**: The model string. ## Time and Date Functions