From 9c6c54d48ea859cc978e03e2f316de80d8a4a8db Mon Sep 17 00:00:00 2001 From: Josh <5685731+marcantony@users.noreply.github.com> Date: Wed, 11 Sep 2024 19:45:26 -0400 Subject: [PATCH] Replace 'withColumns' with 'select' 'withColumns' was introduced in Spark 3.3, so it won't work for Deequ's <3.3 builds. --- src/main/scala/com/amazon/deequ/VerificationResult.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/scala/com/amazon/deequ/VerificationResult.scala b/src/main/scala/com/amazon/deequ/VerificationResult.scala index 418a622e..b9b450f2 100644 --- a/src/main/scala/com/amazon/deequ/VerificationResult.scala +++ b/src/main/scala/com/amazon/deequ/VerificationResult.scala @@ -31,7 +31,7 @@ import com.amazon.deequ.repository.SimpleResultSerde import org.apache.spark.sql.Column import org.apache.spark.sql.DataFrame import org.apache.spark.sql.SparkSession -import org.apache.spark.sql.functions.monotonically_increasing_id +import org.apache.spark.sql.functions.{col, monotonically_increasing_id} import java.util.UUID @@ -96,9 +96,10 @@ object VerificationResult { data: DataFrame): DataFrame = { val columnNamesToMetrics: Map[String, Column] = verificationResultToColumn(verificationResult) + val columnsAliased = columnNamesToMetrics.toSeq.map { case (name, col) => col.as(name) } val dataWithID = data.withColumn(UNIQUENESS_ID, monotonically_increasing_id()) - dataWithID.withColumns(columnNamesToMetrics).drop(UNIQUENESS_ID) + dataWithID.select(col("*") +: columnsAliased: _*).drop(UNIQUENESS_ID) } def checkResultsAsJson(verificationResult: VerificationResult,