Skip to content

Commit

Permalink
Replace 'withColumns' with 'select'
Browse files Browse the repository at this point in the history
'withColumns' was introduced in Spark 3.3, so it won't
work for Deequ's <3.3 builds.
  • Loading branch information
marcantony committed Sep 11, 2024
1 parent 3b1a3ec commit 9c6c54d
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/main/scala/com/amazon/deequ/VerificationResult.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import com.amazon.deequ.repository.SimpleResultSerde
import org.apache.spark.sql.Column
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.monotonically_increasing_id
import org.apache.spark.sql.functions.{col, monotonically_increasing_id}

import java.util.UUID

Expand Down Expand Up @@ -96,9 +96,10 @@ object VerificationResult {
data: DataFrame): DataFrame = {

val columnNamesToMetrics: Map[String, Column] = verificationResultToColumn(verificationResult)
val columnsAliased = columnNamesToMetrics.toSeq.map { case (name, col) => col.as(name) }

val dataWithID = data.withColumn(UNIQUENESS_ID, monotonically_increasing_id())
dataWithID.withColumns(columnNamesToMetrics).drop(UNIQUENESS_ID)
dataWithID.select(col("*") +: columnsAliased: _*).drop(UNIQUENESS_ID)
}

def checkResultsAsJson(verificationResult: VerificationResult,
Expand Down

0 comments on commit 9c6c54d

Please sign in to comment.