From 879b75419bea773122657665e8744797efa9be15 Mon Sep 17 00:00:00 2001
From: Remzi Yang <59198230+HaoYang670@users.noreply.github.com>
Date: Fri, 25 Feb 2022 20:19:05 +0800
Subject: [PATCH] Fix clippy lints (#1885)

* fix some lints

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix some lints

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix some lints

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix some lints

Signed-off-by: remzi <13716567376yh@gmail.com>

* fix all lints

Signed-off-by: remzi <13716567376yh@gmail.com>
---
 ballista/rust/core/src/serde/logical_plan/to_proto.rs |  2 +-
 datafusion/fuzz-utils/src/lib.rs                      |  9 +++------
 datafusion/src/execution/context.rs                   |  6 +++---
 datafusion/src/logical_plan/builder.rs                |  3 +--
 datafusion/src/logical_plan/plan.rs                   |  3 +--
 datafusion/src/optimizer/filter_push_down.rs          |  3 +--
 datafusion/src/physical_plan/hash_aggregate.rs        | 10 +++++-----
 datafusion/src/physical_plan/regex_expressions.rs     |  2 +-
 datafusion/src/physical_plan/union.rs                 |  6 ++----
 datafusion/src/physical_plan/values.rs                |  2 +-
 datafusion/tests/merge_fuzz.rs                        |  3 +--
 datafusion/tests/order_spill_fuzz.rs                  |  3 +--
 12 files changed, 21 insertions(+), 31 deletions(-)
diff --git a/ballista/rust/core/src/serde/logical_plan/to_proto.rs b/ballista/rust/core/src/serde/logical_plan/to_proto.rs
index 16f19b0f4b28..9cb8c4166c93 100644
--- a/ballista/rust/core/src/serde/logical_plan/to_proto.rs
+++ b/ballista/rust/core/src/serde/logical_plan/to_proto.rs
@@ -198,7 +198,7 @@ impl From<&DataType> for protobuf::arrow_type::ArrowTypeEnum {
             DataType::Timestamp(time_unit, timezone) => {
                 ArrowTypeEnum::Timestamp(protobuf::Timestamp {
                     time_unit: protobuf::TimeUnit::from_arrow_time_unit(time_unit) as i32,
-                    timezone: timezone.to_owned().unwrap_or_else(String::new),
+                    timezone: timezone.to_owned().unwrap_or_default(),
                 })
             }
             DataType::Date32 => ArrowTypeEnum::Date32(EmptyMessage {}),
diff --git a/datafusion/fuzz-utils/src/lib.rs b/datafusion/fuzz-utils/src/lib.rs
index e021f55f8724..920a9bc8d2f1 100644
--- a/datafusion/fuzz-utils/src/lib.rs
+++ b/datafusion/fuzz-utils/src/lib.rs
@@ -26,7 +26,7 @@ pub use env_logger;
 pub fn batches_to_vec(batches: &[RecordBatch]) -> Vec<Option<i32>> {
     batches
         .iter()
-        .map(|batch| {
+        .flat_map(|batch| {
             assert_eq!(batch.num_columns(), 1);
             batch
                 .column(0)
@@ -35,7 +35,6 @@ pub fn batches_to_vec(batches: &[RecordBatch]) -> Vec<Option<i32>> {
                 .unwrap()
                 .iter()
         })
-        .flatten()
         .collect()
 }
 
@@ -43,8 +42,7 @@ pub fn batches_to_vec(batches: &[RecordBatch]) -> Vec<Option<i32>> {
 pub fn partitions_to_sorted_vec(partitions: &[Vec<RecordBatch>]) -> Vec<Option<i32>> {
     let mut values: Vec<_> = partitions
         .iter()
-        .map(|batches| batches_to_vec(batches).into_iter())
-        .flatten()
+        .flat_map(|batches| batches_to_vec(batches).into_iter())
         .collect();
 
     values.sort_unstable();
@@ -60,7 +58,7 @@ pub fn add_empty_batches(
 
     batches
         .into_iter()
-        .map(|batch| {
+        .flat_map(|batch| {
             // insert 0, or 1 empty batches before and after the current batch
             let empty_batch = RecordBatch::new_empty(schema.clone());
             std::iter::repeat(empty_batch.clone())
@@ -68,6 +66,5 @@ pub fn add_empty_batches(
                 .chain(std::iter::once(batch))
                 .chain(std::iter::repeat(empty_batch).take(rng.gen_range(0..2)))
         })
-        .flatten()
         .collect()
 }
diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs
index 5a913e91b0f8..3017660f13e7 100644
--- a/datafusion/src/execution/context.rs
+++ b/datafusion/src/execution/context.rs
@@ -1151,7 +1151,7 @@ impl ExecutionProps {
         var_type: VarType,
         provider: Arc<dyn VarProvider + Send + Sync>,
     ) -> Option<Arc<dyn VarProvider + Send + Sync>> {
-        let mut var_providers = self.var_providers.take().unwrap_or_else(HashMap::new);
+        let mut var_providers = self.var_providers.take().unwrap_or_default();
 
         let old_provider = var_providers.insert(var_type, provider);
 
@@ -3262,7 +3262,7 @@ mod tests {
         let logical_plan = ctx.create_logical_plan(sql)?;
         let logical_plan = ctx.optimize(&logical_plan)?;
         let physical_plan = ctx.create_physical_plan(&logical_plan).await?;
-        ctx.write_csv(physical_plan, out_dir.to_string()).await
+        ctx.write_csv(physical_plan, out_dir).await
     }
 
     /// Execute SQL and write results to partitioned parquet files
@@ -3275,7 +3275,7 @@ mod tests {
         let logical_plan = ctx.create_logical_plan(sql)?;
         let logical_plan = ctx.optimize(&logical_plan)?;
         let physical_plan = ctx.create_physical_plan(&logical_plan).await?;
-        ctx.write_parquet(physical_plan, out_dir.to_string(), writer_properties)
+        ctx.write_parquet(physical_plan, out_dir, writer_properties)
             .await
     }
 
diff --git a/datafusion/src/logical_plan/builder.rs b/datafusion/src/logical_plan/builder.rs
index 0144b75166ab..086d331f7042 100644
--- a/datafusion/src/logical_plan/builder.rs
+++ b/datafusion/src/logical_plan/builder.rs
@@ -1041,14 +1041,13 @@ pub(crate) fn expand_wildcard(
     let columns_to_skip = using_columns
         .into_iter()
         // For each USING JOIN condition, only expand to one column in projection
-        .map(|cols| {
+        .flat_map(|cols| {
             let mut cols = cols.into_iter().collect::<Vec<_>>();
             // sort join columns to make sure we consistently keep the same
             // qualified column
             cols.sort();
             cols.into_iter().skip(1)
         })
-        .flatten()
         .collect::<HashSet<_>>();
 
     if columns_to_skip.is_empty() {
diff --git a/datafusion/src/logical_plan/plan.rs b/datafusion/src/logical_plan/plan.rs
index 3d49e5484eab..64907302ee87 100644
--- a/datafusion/src/logical_plan/plan.rs
+++ b/datafusion/src/logical_plan/plan.rs
@@ -543,8 +543,7 @@ impl LogicalPlan {
                 {
                     self.using_columns.push(
                         on.iter()
-                            .map(|entry| [&entry.0, &entry.1])
-                            .flatten()
+                            .flat_map(|entry| [&entry.0, &entry.1])
                             .cloned()
                             .collect::<HashSet<Column>>(),
                     );
diff --git a/datafusion/src/optimizer/filter_push_down.rs b/datafusion/src/optimizer/filter_push_down.rs
index 78911313efaf..d8e43ed2175b 100644
--- a/datafusion/src/optimizer/filter_push_down.rs
+++ b/datafusion/src/optimizer/filter_push_down.rs
@@ -228,14 +228,13 @@ fn get_pushable_join_predicates<'a>(
     let schema_columns = schema
         .fields()
         .iter()
-        .map(|f| {
+        .flat_map(|f| {
             [
                 f.qualified_column(),
                 // we need to push down filter using unqualified column as well
                 f.unqualified_column(),
             ]
         })
-        .flatten()
         .collect::<HashSet<_>>();
 
     state
diff --git a/datafusion/src/physical_plan/hash_aggregate.rs b/datafusion/src/physical_plan/hash_aggregate.rs
index b727cdd2e970..89877b3edbcc 100644
--- a/datafusion/src/physical_plan/hash_aggregate.rs
+++ b/datafusion/src/physical_plan/hash_aggregate.rs
@@ -1176,12 +1176,12 @@ mod tests {
             _partition: usize,
             _runtime: Arc<RuntimeEnv>,
         ) -> Result<SendableRecordBatchStream> {
-            let stream;
-            if self.yield_first {
-                stream = TestYieldingStream::New;
+            let stream = if self.yield_first {
+                TestYieldingStream::New
             } else {
-                stream = TestYieldingStream::Yielded;
-            }
+                TestYieldingStream::Yielded
+            };
+
             Ok(Box::pin(stream))
         }
 
diff --git a/datafusion/src/physical_plan/regex_expressions.rs b/datafusion/src/physical_plan/regex_expressions.rs
index 487e1f14c124..cf997c032840 100644
--- a/datafusion/src/physical_plan/regex_expressions.rs
+++ b/datafusion/src/physical_plan/regex_expressions.rs
@@ -139,7 +139,7 @@ pub fn regexp_replace<T: StringOffsetSizeTrait>(args: &[ArrayRef]) -> Result<Arr
                     let (pattern, replace_all) = if flags == "g" {
                         (pattern.to_string(), true)
                     } else if flags.contains('g') {
-                        (format!("(?{}){}", flags.to_string().replace("g", ""), pattern), true)
+                        (format!("(?{}){}", flags.to_string().replace('g', ""), pattern), true)
                     } else {
                         (format!("(?{}){}", flags, pattern), false)
                     };
diff --git a/datafusion/src/physical_plan/union.rs b/datafusion/src/physical_plan/union.rs
index 03c41f4e1d60..48f7b280b80e 100644
--- a/datafusion/src/physical_plan/union.rs
+++ b/datafusion/src/physical_plan/union.rs
@@ -201,14 +201,12 @@ fn col_stats_union(
         .min_value
         .zip(right.min_value)
         .map(|(a, b)| expressions::helpers::min(&a, &b))
-        .map(Result::ok)
-        .flatten();
+        .and_then(Result::ok);
     left.max_value = left
         .max_value
         .zip(right.max_value)
         .map(|(a, b)| expressions::helpers::max(&a, &b))
-        .map(Result::ok)
-        .flatten();
+        .and_then(Result::ok);
     left.null_count = left.null_count.zip(right.null_count).map(|(a, b)| a + b);
 
     left
diff --git a/datafusion/src/physical_plan/values.rs b/datafusion/src/physical_plan/values.rs
index da39b3eb4bbc..c65082ef0677 100644
--- a/datafusion/src/physical_plan/values.rs
+++ b/datafusion/src/physical_plan/values.rs
@@ -190,7 +190,7 @@ mod tests {
     async fn values_empty_case() -> Result<()> {
         let schema = test_util::aggr_test_schema();
         let empty = ValuesExec::try_new(schema, vec![]);
-        assert!(!empty.is_ok());
+        assert!(empty.is_err());
         Ok(())
     }
 }
diff --git a/datafusion/tests/merge_fuzz.rs b/datafusion/tests/merge_fuzz.rs
index 6821c6ba52d0..d874ec507c49 100644
--- a/datafusion/tests/merge_fuzz.rs
+++ b/datafusion/tests/merge_fuzz.rs
@@ -104,8 +104,7 @@ async fn run_merge_test(input: Vec<Vec<RecordBatch>>) {
     for batch_size in batch_sizes {
         let first_batch = input
             .iter()
-            .map(|p| p.iter())
-            .flatten()
+            .flat_map(|p| p.iter())
             .next()
             .expect("at least one batch");
         let schema = first_batch.schema();
diff --git a/datafusion/tests/order_spill_fuzz.rs b/datafusion/tests/order_spill_fuzz.rs
index 049fe6a4f4fd..b1586f06c02c 100644
--- a/datafusion/tests/order_spill_fuzz.rs
+++ b/datafusion/tests/order_spill_fuzz.rs
@@ -58,8 +58,7 @@ async fn run_sort(pool_size: usize, size_spill: Vec<(usize, bool)>) {
         let input = vec![make_staggered_batches(size)];
         let first_batch = input
             .iter()
-            .map(|p| p.iter())
-            .flatten()
+            .flat_map(|p| p.iter())
             .next()
             .expect("at least one batch");
         let schema = first_batch.schema();