Replace AsIsExchangeExec with AsIsShuffleExchangeExec

G-Research · Oct 24, 2023 · 2e5c792 · 2e5c792
1 parent f53ee17
commit 2e5c792
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 12 deletions.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -19,9 +19,9 @@ package org.apache.spark.sql.catalyst.plans.physical
 
 import scala.annotation.tailrec
 import scala.collection.mutable
+
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.LeafNode
 import org.apache.spark.sql.catalyst.util.InternalRowComparableWrapper
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, IntegerType}

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.adaptive.{AdaptiveExecutionContext, InsertAdaptiveSparkPlan}
 import org.apache.spark.sql.execution.bucketing.{CoalesceBucketsInJoin, DisableUnnecessaryBucketedScan}
 import org.apache.spark.sql.execution.dynamicpruning.PlanDynamicPruningFilters
-import org.apache.spark.sql.execution.exchange.EnsureRequirements
+import org.apache.spark.sql.execution.exchange.{EnsureRequirements, ReuseSampledStage}
 import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery
 import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata, WatermarkPropagator}
 import org.apache.spark.sql.internal.SQLConf
@@ -456,6 +456,7 @@ object QueryExecution {
       PlanSubqueries(sparkSession),
       RemoveRedundantProjects,
       EnsureRequirements(),
+      ReuseSampledStage,
       // `ReplaceHashWithSortAgg` needs to be added after `EnsureRequirements` to guarantee the
       // sort order of each node is checked to be valid.
       ReplaceHashWithSortAgg,

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -123,6 +123,7 @@ case class AdaptiveSparkPlanExec(
       CoalesceBucketsInJoin,
       RemoveRedundantProjects,
       ensureRequirements,
+      ReuseSampledStage,
       AdjustShuffleExchangePosition,
       ValidateSparkPlan,
       ReplaceHashWithSortAgg,

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -588,8 +588,8 @@ case class EnsureRequirements(
   def apply(plan: SparkPlan): SparkPlan = {
     val newPlan = plan.transformUp {
       case operator @ ShuffleExchangeExec(_: RangePartitioning, child, _, _)
-          if ! child.isInstanceOf[AsIsExchangeExec] =>
-        operator.withNewChildren(Seq(AsIsExchangeExec(child)))
+          if ! child.isInstanceOf[AsIsShuffleExchangeExec] =>
+        operator.withNewChildren(Seq(AsIsShuffleExchangeExec(child)))
       case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin, _)
           if optimizeOutRepartition &&
             (shuffleOrigin == REPARTITION_BY_COL || shuffleOrigin == REPARTITION_BY_NUM) =>
@@ -637,3 +637,11 @@ case class EnsureRequirements(
     }
   }
 }
+
+object ReuseSampledStage extends Rule[SparkPlan] {
+  override def apply(plan: SparkPlan): SparkPlan = plan.transformUp {
+    case operator @ ShuffleExchangeExec(_: RangePartitioning, child, _, _)
+      if !child.isInstanceOf[AsIsShuffleExchangeExec] =>
+      operator.withNewChildren(Seq(AsIsShuffleExchangeExec(child)))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -91,11 +91,3 @@ case class ReusedExchangeExec(override val output: Seq[Attribute], child: Exchan
        |""".stripMargin
   }
 }
-
-case class AsIsExchangeExec(override val child: SparkPlan) extends Exchange {
-  override def outputOrdering: Seq[SortOrder] = child.outputOrdering
-  override def outputPartitioning: Partitioning = child.outputPartitioning
-  override protected def doExecute(): RDD[InternalRow] = child.doExecute()
-  override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
-    copy(child = newChild)
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -406,6 +406,9 @@ object ShuffleExchangeExec {
           position += 1
           position
         }
+      case a: AsIsPartitioning =>
+        val projection = UnsafeProjection.create(a.partitionIdExpression :: Nil, outputAttributes)
+        row => projection(row).getInt(0)
       case h: HashPartitioning =>
         val projection = UnsafeProjection.create(h.partitionIdExpression :: Nil, outputAttributes)
         row => projection(row).getInt(0)