From 8da364979165a106d92014fea25c139614374d22 Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Thu, 11 Jul 2024 22:27:19 +0800 Subject: [PATCH 1/3] remove termtree dependency --- datafusion/physical-plan/Cargo.toml | 1 - .../physical-plan/src/aggregates/topk/heap.rs | 54 +++++++++++++------ 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index f5f756417ebf..00fc81ebde97 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -66,7 +66,6 @@ tokio = { workspace = true } [dev-dependencies] rstest = { workspace = true } rstest_reuse = "0.7.0" -termtree = "0.5.0" tokio = { workspace = true, features = [ "rt-multi-thread", "fs", diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs index 51593f5c28ce..58d5777da2c5 100644 --- a/datafusion/physical-plan/src/aggregates/topk/heap.rs +++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs @@ -324,28 +324,52 @@ impl TopKHeap { } #[cfg(test)] - fn _tree_print(&self, idx: usize) -> Option> { - let hi = self.heap.get(idx)?; - match hi { - None => None, - Some(hi) => { - let label = - format!("val={:?} idx={}, bucket={}", hi.val, idx, hi.map_idx); - let left = self._tree_print(idx * 2 + 1); - let right = self._tree_print(idx * 2 + 2); - let children = left.into_iter().chain(right); - let me = termtree::Tree::new(label).with_leaves(children); - Some(me) + fn _tree_print( + &self, + idx: usize, + prefix: String, + is_tail: bool, + output: &mut String, + ) { + if let Some(Some(hi)) = self.heap.get(idx) { + let connector = if idx != 0 { + if is_tail { + "└── " + } else { + "├── " + } + } else { + "" + }; + output.push_str(&format!( + "{}{}val={:?} idx={}, bucket={}\n", + prefix, connector, hi.val, idx, hi.map_idx + )); + let new_prefix = if is_tail { "" } else { "│ " }; + let child_prefix = format!("{}{}", prefix, new_prefix); + + let left_idx = idx * 2 + 1; + let right_idx = idx * 2 + 2; + + let left_exists = left_idx < self.len; + let right_exists = right_idx < self.len; + + if left_exists { + self._tree_print(left_idx, child_prefix.clone(), !right_exists, output); + } + if right_exists { + self._tree_print(right_idx, child_prefix, true, output); } } } #[cfg(test)] fn tree_print(&self) -> String { - match self._tree_print(0) { - None => "".to_string(), - Some(root) => format!("{}", root), + let mut output = String::new(); + if self.heap.first().is_some() { + self._tree_print(0, String::new(), true, &mut output); } + output } } From fa0deb5c0110027c059903c2e565cc20f8a250fe Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Fri, 12 Jul 2024 11:36:27 +0800 Subject: [PATCH 2/3] impl Display for TopKHeap, replace uses of tree_print in tests --- .../physical-plan/src/aggregates/topk/heap.rs | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs index 58d5777da2c5..dfd56e3fd687 100644 --- a/datafusion/physical-plan/src/aggregates/topk/heap.rs +++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs @@ -27,7 +27,7 @@ use datafusion_common::Result; use datafusion_physical_expr::aggregate::utils::adjust_output_array; use half::f16; use std::cmp::Ordering; -use std::fmt::{Debug, Formatter}; +use std::fmt::{Debug, Display, Formatter}; use std::sync::Arc; /// A custom version of `Ord` that only exists to we can implement it for the Values in our heap @@ -323,7 +323,6 @@ impl TopKHeap { } } - #[cfg(test)] fn _tree_print( &self, idx: usize, @@ -362,14 +361,15 @@ impl TopKHeap { } } } +} - #[cfg(test)] - fn tree_print(&self) -> String { +impl Display for TopKHeap { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { let mut output = String::new(); if self.heap.first().is_some() { self._tree_print(0, String::new(), true, &mut output); } - output + write!(f, "{}", output) } } @@ -385,9 +385,9 @@ impl HeapItem { impl Debug for HeapItem { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str("bucket=")?; - self.map_idx.fmt(f)?; + Debug::fmt(&self.map_idx, f)?; f.write_str(" val=")?; - self.val.fmt(f)?; + Debug::fmt(&self.val, f)?; f.write_str("\n")?; Ok(()) } @@ -486,7 +486,7 @@ mod tests { let mut heap = TopKHeap::new(10, false); heap.append_or_replace(1, 1, &mut map); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=1 idx=0, bucket=1 "#; @@ -506,7 +506,7 @@ val=1 idx=0, bucket=1 heap.append_or_replace(2, 2, &mut map); assert_eq!(map, vec![(2, 0), (1, 1)]); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=2 idx=0, bucket=2 └── val=1 idx=1, bucket=1 @@ -524,7 +524,7 @@ val=2 idx=0, bucket=2 heap.append_or_replace(1, 1, &mut map); heap.append_or_replace(2, 2, &mut map); heap.append_or_replace(3, 3, &mut map); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=3 idx=0, bucket=3 ├── val=1 idx=1, bucket=1 @@ -534,7 +534,7 @@ val=3 idx=0, bucket=3 let mut map = vec![]; heap.append_or_replace(0, 0, &mut map); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=2 idx=0, bucket=2 ├── val=1 idx=1, bucket=1 @@ -555,7 +555,7 @@ val=2 idx=0, bucket=2 heap.append_or_replace(2, 2, &mut map); heap.append_or_replace(3, 3, &mut map); heap.append_or_replace(4, 4, &mut map); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=4 idx=0, bucket=4 ├── val=3 idx=1, bucket=3 @@ -566,7 +566,7 @@ val=4 idx=0, bucket=4 let mut map = vec![]; heap.replace_if_better(1, 0, &mut map); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=4 idx=0, bucket=4 ├── val=1 idx=1, bucket=1 @@ -587,7 +587,7 @@ val=4 idx=0, bucket=4 heap.append_or_replace(1, 1, &mut map); heap.append_or_replace(2, 2, &mut map); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=2 idx=0, bucket=2 └── val=1 idx=1, bucket=1 @@ -608,7 +608,7 @@ val=2 idx=0, bucket=2 heap.append_or_replace(1, 1, &mut map); heap.append_or_replace(2, 2, &mut map); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=2 idx=0, bucket=2 └── val=1 idx=1, bucket=1 @@ -631,7 +631,7 @@ val=2 idx=0, bucket=2 heap.append_or_replace(1, 1, &mut map); heap.append_or_replace(2, 2, &mut map); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=2 idx=0, bucket=2 └── val=1 idx=1, bucket=1 @@ -640,7 +640,7 @@ val=2 idx=0, bucket=2 let numbers = vec![(0, 1), (1, 2)]; heap.renumber(numbers.as_slice()); - let actual = heap.tree_print(); + let actual = format!("{heap}"); let expected = r#" val=2 idx=0, bucket=1 └── val=1 idx=1, bucket=2 From 0abfe6a50fb3029aed145983d2b105e3d546ef5c Mon Sep 17 00:00:00 2001 From: kf zheng <100595273+kev1n8@users.noreply.github.com> Date: Fri, 12 Jul 2024 19:13:51 +0800 Subject: [PATCH 3/3] use to_string instead of format! --- .../physical-plan/src/aggregates/topk/heap.rs | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs index dfd56e3fd687..81eadbc018b3 100644 --- a/datafusion/physical-plan/src/aggregates/topk/heap.rs +++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs @@ -486,7 +486,7 @@ mod tests { let mut heap = TopKHeap::new(10, false); heap.append_or_replace(1, 1, &mut map); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=1 idx=0, bucket=1 "#; @@ -506,7 +506,7 @@ val=1 idx=0, bucket=1 heap.append_or_replace(2, 2, &mut map); assert_eq!(map, vec![(2, 0), (1, 1)]); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=2 idx=0, bucket=2 └── val=1 idx=1, bucket=1 @@ -524,7 +524,7 @@ val=2 idx=0, bucket=2 heap.append_or_replace(1, 1, &mut map); heap.append_or_replace(2, 2, &mut map); heap.append_or_replace(3, 3, &mut map); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=3 idx=0, bucket=3 ├── val=1 idx=1, bucket=1 @@ -534,7 +534,7 @@ val=3 idx=0, bucket=3 let mut map = vec![]; heap.append_or_replace(0, 0, &mut map); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=2 idx=0, bucket=2 ├── val=1 idx=1, bucket=1 @@ -555,7 +555,7 @@ val=2 idx=0, bucket=2 heap.append_or_replace(2, 2, &mut map); heap.append_or_replace(3, 3, &mut map); heap.append_or_replace(4, 4, &mut map); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=4 idx=0, bucket=4 ├── val=3 idx=1, bucket=3 @@ -566,7 +566,7 @@ val=4 idx=0, bucket=4 let mut map = vec![]; heap.replace_if_better(1, 0, &mut map); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=4 idx=0, bucket=4 ├── val=1 idx=1, bucket=1 @@ -587,7 +587,7 @@ val=4 idx=0, bucket=4 heap.append_or_replace(1, 1, &mut map); heap.append_or_replace(2, 2, &mut map); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=2 idx=0, bucket=2 └── val=1 idx=1, bucket=1 @@ -608,7 +608,7 @@ val=2 idx=0, bucket=2 heap.append_or_replace(1, 1, &mut map); heap.append_or_replace(2, 2, &mut map); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=2 idx=0, bucket=2 └── val=1 idx=1, bucket=1 @@ -631,7 +631,7 @@ val=2 idx=0, bucket=2 heap.append_or_replace(1, 1, &mut map); heap.append_or_replace(2, 2, &mut map); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=2 idx=0, bucket=2 └── val=1 idx=1, bucket=1 @@ -640,7 +640,7 @@ val=2 idx=0, bucket=2 let numbers = vec![(0, 1), (1, 2)]; heap.renumber(numbers.as_slice()); - let actual = format!("{heap}"); + let actual = heap.to_string(); let expected = r#" val=2 idx=0, bucket=1 └── val=1 idx=1, bucket=2