From d92897d5dec2b9fb37d81640a38641bc7f53dc31 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Sep 2024 09:19:01 -0400 Subject: [PATCH 1/5] Minor: improve performance of ScalarValue debug --- datafusion/common/src/scalar/mod.rs | 54 ++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 3cff0731dcee..b48247d57ec2 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3707,7 +3707,7 @@ impl fmt::Debug for ScalarValue { write!(f, "FixedSizeBinary({size}, {self})") } ScalarValue::FixedSizeBinary(size, Some(b)) => { - write!(f, "Binary({size}, \"")?; + write!(f, "FixedSizeBinary({size}, \"")?; fmt_binary(b.as_slice(), f)?; write!(f, "\")") } @@ -6571,6 +6571,58 @@ mod tests { assert_eq!(format!("{large_binary_value}"), "0102030405060708090A..."); } + #[test] + fn test_binary_debug() { + let no_binary_value = ScalarValue::Binary(None); + assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)"); + let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3])); + assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")"); + let large_binary_value = + ScalarValue::Binary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); + assert_eq!( + format!("{large_binary_value:?}"), + "Binary(\"1,2,3,4,5,6,7,8,9,10,11\")" + ); + + let no_binary_value = ScalarValue::BinaryView(None); + assert_eq!(format!("{no_binary_value:?}"), "BinaryView(NULL)"); + let small_binary_value = ScalarValue::BinaryView(Some(vec![1u8, 2, 3])); + assert_eq!(format!("{small_binary_value:?}"), "BinaryView(\"1,2,3\")"); + let large_binary_value = + ScalarValue::BinaryView(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); + assert_eq!( + format!("{large_binary_value:?}"), + "BinaryView(\"1,2,3,4,5,6,7,8,9,10,11\")" + ); + + let no_binary_value = ScalarValue::LargeBinary(None); + assert_eq!(format!("{no_binary_value:?}"), "LargeBinary(NULL)"); + let small_binary_value = ScalarValue::LargeBinary(Some(vec![1u8, 2, 3])); + assert_eq!(format!("{small_binary_value:?}"), "LargeBinary(\"1,2,3\")"); + let large_binary_value = + ScalarValue::LargeBinary(Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])); + assert_eq!( + format!("{large_binary_value:?}"), + "LargeBinary(\"1,2,3,4,5,6,7,8,9,10,11\")" + ); + + let no_binary_value = ScalarValue::FixedSizeBinary(3, None); + assert_eq!(format!("{no_binary_value:?}"), "FixedSizeBinary(3, NULL)"); + let small_binary_value = ScalarValue::FixedSizeBinary(3, Some(vec![1u8, 2, 3])); + assert_eq!( + format!("{small_binary_value:?}"), + "FixedSizeBinary(3, \"1,2,3\")" + ); + let large_binary_value = ScalarValue::FixedSizeBinary( + 11, + Some(vec![1u8, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]), + ); + assert_eq!( + format!("{large_binary_value:?}"), + "FixedSizeBinary(11, \"1,2,3,4,5,6,7,8,9,10,11\")" + ); + } + #[test] fn test_build_timestamp_millisecond_list() { let values = vec![ScalarValue::TimestampMillisecond(Some(1), None)]; From 3ed59813e8e4c80fb70209052f1955bdd88fab67 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 4 Sep 2024 09:26:23 -0400 Subject: [PATCH 2/5] reduce allocations --- datafusion/common/src/scalar/mod.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index b48247d57ec2..3e5e55bfdb60 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3646,18 +3646,22 @@ fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{value_formatter}") } +/// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"` +fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result { + let mut first = true; + for b in data { + if !first { + write!(f, ",{b}")?; + } else { + write!(f, "{}", b)?; + } + first = false; + } + Ok(()) +} + impl fmt::Debug for ScalarValue { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{}", - data.iter() - .map(|v| format!("{v}")) - .collect::>() - .join(",") - ) - } match self { ScalarValue::Decimal128(_, _, _) => write!(f, "Decimal128({self})"), ScalarValue::Decimal256(_, _, _) => write!(f, "Decimal256({self})"), From 99a57e77b0ed6641982af65d8a33ef9b9a73296f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Sep 2024 07:39:08 -0400 Subject: [PATCH 3/5] simplify loop --- datafusion/common/src/scalar/mod.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 3e5e55bfdb60..d73ae7592047 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3648,14 +3648,12 @@ fn fmt_list(arr: ArrayRef, f: &mut fmt::Formatter) -> fmt::Result { /// writes a byte array to formatter. `[1, 2, 3]` ==> `"1,2,3"` fn fmt_binary(data: &[u8], f: &mut fmt::Formatter) -> fmt::Result { - let mut first = true; - for b in data { - if !first { - write!(f, ",{b}")?; - } else { - write!(f, "{}", b)?; - } - first = false; + let mut iter = data.iter(); + if let Some(b) = iter.next() { + write!(f, "{b}")?; + } + for b in iter { + write!(f, ",{b}")?; } Ok(()) } From 7017d520329c17046fb2bf5d98c1286b6210b21c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Sep 2024 07:40:34 -0400 Subject: [PATCH 4/5] Add test for single list --- datafusion/common/src/scalar/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index f6d20dae752e..dc76889f15da 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -6706,6 +6706,8 @@ mod tests { fn test_binary_debug() { let no_binary_value = ScalarValue::Binary(None); assert_eq!(format!("{no_binary_value:?}"), "Binary(NULL)"); + let single_binary_value = ScalarValue::Binary(Some(vec![42u8])); + assert_eq!(format!("{single_binary_value:?}"), "Binary(\"42\")"); let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3])); assert_eq!(format!("{small_binary_value:?}"), "Binary(\"1,2,3\")"); let large_binary_value = From a7e3f2a79693d213d640c1983c923bfaff8ec981 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Thu, 5 Sep 2024 07:41:30 -0400 Subject: [PATCH 5/5] Add another test for display --- datafusion/common/src/scalar/mod.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index dc76889f15da..0bd730a0a701 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -6669,6 +6669,8 @@ mod tests { fn test_binary_display() { let no_binary_value = ScalarValue::Binary(None); assert_eq!(format!("{no_binary_value}"), "NULL"); + let single_binary_value = ScalarValue::Binary(Some(vec![42u8])); + assert_eq!(format!("{single_binary_value}"), "2A"); let small_binary_value = ScalarValue::Binary(Some(vec![1u8, 2, 3])); assert_eq!(format!("{small_binary_value}"), "010203"); let large_binary_value =