From 48490eeffda4731417d1c607d49a985578cc80de Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 17 Jan 2024 07:06:21 -0500 Subject: [PATCH 1/3] Fix datafusion-cli print output --- datafusion-cli/src/print_format.rs | 82 +++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 2 deletions(-) diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs index ea418562495d..5fdc57d6176b 100644 --- a/datafusion-cli/src/print_format.rs +++ b/datafusion-cli/src/print_format.rs @@ -161,7 +161,7 @@ impl PrintFormat { maxrows: MaxRows, with_header: bool, ) -> Result<()> { - if batches.is_empty() || batches[0].num_rows() == 0 { + if batches.is_empty() { return Ok(()); } @@ -189,7 +189,7 @@ mod tests { use super::*; - use arrow::array::Int32Array; + use arrow::array::{ArrayRef, Int32Array}; use arrow::datatypes::{DataType, Field, Schema}; use datafusion::error::Result; @@ -351,4 +351,82 @@ mod tests { Ok(()) } + + + #[test] + fn test_print_batches_empty_batches() -> Result<()> { + let batch = RecordBatch::try_from_iter( + vec![ + ("a", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef), + ], + )?; + let empty_batch = RecordBatch::new_empty(batch.schema()); + + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_batches(vec![ + empty_batch.clone(), + batch, + empty_batch, + ]) + .with_expected( + &[ + "+---+", + "| a |", + "+---+", + "| 1 |", + "| 2 |", + "| 3 |", + "+---+\n", + ]) + .run(); + Ok(()) + } + + struct PrintBatchesTest { + format: PrintFormat, + batches: Vec, + maxrows: MaxRows, + with_header: bool, + expected: Vec<&'static str>, + } + + impl PrintBatchesTest { + fn new() -> Self { + Self { + format: PrintFormat::Table, + batches: vec![], + maxrows: MaxRows::Unlimited, + with_header: false, + expected: vec![] + } + } + + /// set the format + fn with_format(mut self, format: PrintFormat) -> Self { + self.format = format; + self + } + + /// set the batches to convert + fn with_batches(mut self, batches: Vec) -> Self { + self.batches = batches; + self + } + + /// set expected output + fn with_expected(mut self, expected: &[&'static str]) -> Self { + self.expected = expected.to_vec(); + self + } + + /// run the test + fn run(self) { + let mut buffer: Vec = vec![]; + self.format.print_batches(&mut buffer, &self.batches, self.maxrows, self.with_header).unwrap(); + let actual = String::from_utf8(buffer).unwrap(); + let expected = self.expected.join("\n"); + assert_eq!(actual, expected, "actual:\n\n{actual}expected:\n\n{expected}"); + } + } } From eae003176bbd496bb45d195d41ef18a11371272c Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 17 Jan 2024 07:17:51 -0500 Subject: [PATCH 2/3] fmt --- datafusion-cli/src/print_format.rs | 51 +++++++++++++++--------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs index 5fdc57d6176b..142f3aae9c3f 100644 --- a/datafusion-cli/src/print_format.rs +++ b/datafusion-cli/src/print_format.rs @@ -352,35 +352,31 @@ mod tests { Ok(()) } - #[test] fn test_print_batches_empty_batches() -> Result<()> { - let batch = RecordBatch::try_from_iter( - vec![ - ("a", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef), - ], - )?; + let batch = RecordBatch::try_from_iter(vec![( + "a", + Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef, + )])?; let empty_batch = RecordBatch::new_empty(batch.schema()); + #[rustfmt::skip] + let expected =&[ + "+---+", + "| a |", + "+---+", + "| 1 |", + "| 2 |", + "| 3 |", + "+---+\n", + ]; + PrintBatchesTest::new() .with_format(PrintFormat::Table) - .with_batches(vec![ - empty_batch.clone(), - batch, - empty_batch, - ]) - .with_expected( - &[ - "+---+", - "| a |", - "+---+", - "| 1 |", - "| 2 |", - "| 3 |", - "+---+\n", - ]) + .with_batches(vec![empty_batch.clone(), batch, empty_batch]) + .with_expected(expected) .run(); - Ok(()) + Ok(()) } struct PrintBatchesTest { @@ -398,7 +394,7 @@ mod tests { batches: vec![], maxrows: MaxRows::Unlimited, with_header: false, - expected: vec![] + expected: vec![], } } @@ -423,10 +419,15 @@ mod tests { /// run the test fn run(self) { let mut buffer: Vec = vec![]; - self.format.print_batches(&mut buffer, &self.batches, self.maxrows, self.with_header).unwrap(); + self.format + .print_batches(&mut buffer, &self.batches, self.maxrows, self.with_header) + .unwrap(); let actual = String::from_utf8(buffer).unwrap(); let expected = self.expected.join("\n"); - assert_eq!(actual, expected, "actual:\n\n{actual}expected:\n\n{expected}"); + assert_eq!( + actual, expected, + "actual:\n\n{actual}expected:\n\n{expected}" + ); } } } From 2ef1f9dcdf6b8ff753a9ea7722f58f09d086ebc7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 17 Jan 2024 10:00:29 -0500 Subject: [PATCH 3/3] Do not print header if only empty batches, test for same --- datafusion-cli/src/print_format.rs | 52 ++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/datafusion-cli/src/print_format.rs b/datafusion-cli/src/print_format.rs index 142f3aae9c3f..0a8c7b4b3e3a 100644 --- a/datafusion-cli/src/print_format.rs +++ b/datafusion-cli/src/print_format.rs @@ -161,23 +161,29 @@ impl PrintFormat { maxrows: MaxRows, with_header: bool, ) -> Result<()> { + // filter out any empty batches + let batches: Vec<_> = batches + .iter() + .filter(|b| b.num_rows() > 0) + .cloned() + .collect(); if batches.is_empty() { return Ok(()); } match self { Self::Csv | Self::Automatic => { - print_batches_with_sep(writer, batches, b',', with_header) + print_batches_with_sep(writer, &batches, b',', with_header) } - Self::Tsv => print_batches_with_sep(writer, batches, b'\t', with_header), + Self::Tsv => print_batches_with_sep(writer, &batches, b'\t', with_header), Self::Table => { if maxrows == MaxRows::Limited(0) { return Ok(()); } - format_batches_with_maxrows(writer, batches, maxrows) + format_batches_with_maxrows(writer, &batches, maxrows) } - Self::Json => batches_to_json!(ArrayWriter, writer, batches), - Self::NdJson => batches_to_json!(LineDelimitedWriter, writer, batches), + Self::Json => batches_to_json!(ArrayWriter, writer, &batches), + Self::NdJson => batches_to_json!(LineDelimitedWriter, writer, &batches), } } } @@ -354,10 +360,7 @@ mod tests { #[test] fn test_print_batches_empty_batches() -> Result<()> { - let batch = RecordBatch::try_from_iter(vec![( - "a", - Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef, - )])?; + let batch = one_column_batch(); let empty_batch = RecordBatch::new_empty(batch.schema()); #[rustfmt::skip] @@ -379,6 +382,22 @@ mod tests { Ok(()) } + #[test] + fn test_print_batches_empty_batches_no_header() -> Result<()> { + let empty_batch = RecordBatch::new_empty(one_column_batch().schema()); + + // empty batches should not print a header + let expected = &[""]; + + PrintBatchesTest::new() + .with_format(PrintFormat::Table) + .with_batches(vec![empty_batch]) + .with_header(true) + .with_expected(expected) + .run(); + Ok(()) + } + struct PrintBatchesTest { format: PrintFormat, batches: Vec, @@ -410,6 +429,12 @@ mod tests { self } + /// set whether to include a header + fn with_header(mut self, with_header: bool) -> Self { + self.with_header = with_header; + self + } + /// set expected output fn with_expected(mut self, expected: &[&'static str]) -> Self { self.expected = expected.to_vec(); @@ -430,4 +455,13 @@ mod tests { ); } } + + /// return a batch with one column and three rows + fn one_column_batch() -> RecordBatch { + RecordBatch::try_from_iter(vec![( + "a", + Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef, + )]) + .unwrap() + } }