Skip to content

Commit

Permalink
Added similar logic to RowLevelConstraint as well
Browse files Browse the repository at this point in the history
Skipped RowLevelGroupedConstraint because only UniqueValueRatio/Uniqueness use it, and they don't use preconditions.
  • Loading branch information
rdsharma26 committed Dec 17, 2024
1 parent ddc0942 commit 2e7b2d8
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 19 deletions.
2 changes: 1 addition & 1 deletion src/main/scala/com/amazon/deequ/VerificationResult.scala
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ object VerificationResult {
case asserted: RowLevelAssertedConstraint =>
constraintResult.metric.flatMap(metricToColumn).map(asserted.assertion(_)).orElse(Some(lit(false)))
case _: RowLevelConstraint =>
constraintResult.metric.flatMap(metricToColumn)
constraintResult.metric.flatMap(metricToColumn).orElse(Some(lit(false)))
case _: RowLevelGroupedConstraint =>
constraintResult.metric.flatMap(metricToColumn)
case _ => None
Expand Down
63 changes: 45 additions & 18 deletions src/test/scala/com/amazon/deequ/VerificationSuiteTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2005,24 +2005,51 @@ class VerificationSuiteTest extends WordSpec with Matchers with SparkContextSpec
("4", 4, "red"),
("5", 5, "purple")
).toDF("id", "id2", "color")
val invalidColumn = "id"
val validColumn = "id2"
val checkOnInvalidColumnDescription = s"check on $invalidColumn"
val checkOnValidColumnDescription = s"check on $validColumn"
val checkOnInvalidColumn = Check(CheckLevel.Error, checkOnInvalidColumnDescription)
.hasMin(invalidColumn, _ >= 3)
.isComplete(invalidColumn)
val checkOnValidColumn = Check(CheckLevel.Error, checkOnValidColumnDescription)
.hasMin(validColumn, _ >= 3)
.isComplete(validColumn)
val verificationResult =
VerificationSuite().onData(df).addChecks(Seq(checkOnInvalidColumn, checkOnValidColumn)).run()
val rowLevelResults =
VerificationResult.rowLevelResultsAsDataFrame(sparkSession, verificationResult, df).collect()
val invalidColumnCheckRowLevelResults = rowLevelResults.map(_.getAs[Boolean](checkOnInvalidColumnDescription))
val validColumnCheckRowLevelResults = rowLevelResults.map(_.getAs[Boolean](checkOnValidColumnDescription))
invalidColumnCheckRowLevelResults shouldBe Seq(false, false, false, false, false)
validColumnCheckRowLevelResults shouldBe Seq(false, false, true, true, true)

val idColumn = "id"
val id2Column = "id2"

val minCheckOnInvalidColumnDescription = s"min check on $idColumn"
val minCheckOnValidColumnDescription = s"min check on $id2Column"
val patternMatchCheckOnInvalidColumnDescription = s"pattern check on $id2Column"
val patternMatchCheckOnValidColumnDescription = s"pattern check on $idColumn"

val minCheckOnInvalidColumn = Check(CheckLevel.Error, minCheckOnInvalidColumnDescription)
.hasMin(idColumn, _ >= 3)
.isComplete(idColumn)
val minCheckOnValidColumn = Check(CheckLevel.Error, minCheckOnValidColumnDescription)
.hasMin(id2Column, _ >= 3)
.isComplete(id2Column)

val patternMatchCheckOnInvalidColumn = Check(CheckLevel.Error, patternMatchCheckOnInvalidColumnDescription)
.hasPattern(id2Column, "[0-3]+".r)
val patternMatchCheckOnValidColumn = Check(CheckLevel.Error, patternMatchCheckOnValidColumnDescription)
.hasPattern(idColumn, "[0-3]+".r)

val checks = Seq(
minCheckOnInvalidColumn,
minCheckOnValidColumn,
patternMatchCheckOnInvalidColumn,
patternMatchCheckOnValidColumn
)

val verificationResult = VerificationSuite().onData(df).addChecks(checks).run()
val rowLevelResultsDF = VerificationResult.rowLevelResultsAsDataFrame(sparkSession, verificationResult, df)
val rowLevelResults = rowLevelResultsDF.collect()

val minCheckOnInvalidColumnRowLevelResults =
rowLevelResults.map(_.getAs[Boolean](minCheckOnInvalidColumnDescription))
val minCheckOnValidColumnRowLevelResults =
rowLevelResults.map(_.getAs[Boolean](minCheckOnValidColumnDescription))
val patternMatchCheckOnInvalidColumnRowLevelResults =
rowLevelResults.map(_.getAs[Boolean](patternMatchCheckOnInvalidColumnDescription))
val patternMatchCheckOnValidColumnRowLevelResults =
rowLevelResults.map(_.getAs[Boolean](patternMatchCheckOnValidColumnDescription))

minCheckOnInvalidColumnRowLevelResults shouldBe Seq(false, false, false, false, false)
minCheckOnValidColumnRowLevelResults shouldBe Seq(false, false, true, true, true)
patternMatchCheckOnInvalidColumnRowLevelResults shouldBe Seq(false, false, false, false, false)
patternMatchCheckOnValidColumnRowLevelResults shouldBe Seq(true, true, true, false, false)
}

"yield correct results for satisfies check" in withSparkSession { sparkSession =>
Expand Down

0 comments on commit 2e7b2d8

Please sign in to comment.