From f0922c36d55832f7b540f991e4b14e8cf18901c3 Mon Sep 17 00:00:00 2001 From: Tuan Pham Date: Sun, 15 Sep 2024 14:47:04 +1000 Subject: [PATCH 1/2] Make addAnomalyCheck not requiring State type --- pom.xml | 2 +- .../amazon/deequ/VerificationRunBuilder.scala | 8 +-- .../applicability/Applicability.scala | 2 +- .../scala/com/amazon/deequ/checks/Check.scala | 12 ++-- .../constraints/AnalysisBasedConstraint.scala | 4 +- .../amazon/deequ/constraints/Constraint.scala | 56 +++++++++---------- .../AnalysisBasedConstraintTest.scala | 28 +++++----- .../deequ/constraints/ConstraintUtils.scala | 2 +- .../deequ/constraints/ConstraintsTest.scala | 8 +-- ...ConstraintSuggestionsIntegrationTest.scala | 10 ++-- 10 files changed, 66 insertions(+), 66 deletions(-) diff --git a/pom.xml b/pom.xml index 0652f5008..18098249e 100644 --- a/pom.xml +++ b/pom.xml @@ -233,7 +233,7 @@ 1.0.0 false - true + false true false ${project.basedir}/src/main/scala diff --git a/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala b/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala index 150e164fd..d665663c0 100644 --- a/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala +++ b/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala @@ -223,9 +223,9 @@ class VerificationRunBuilderWithRepository( * @param analyzer The analyzer for the metric to run anomaly detection on * @param anomalyCheckConfig Some configuration settings for the Check */ - def addAnomalyCheck[S <: State[S]]( + def addAnomalyCheck( anomalyDetectionStrategy: AnomalyDetectionStrategy, - analyzer: Analyzer[S, Metric[Double]], + analyzer: Analyzer[_ <: State[_], Metric[Double]], anomalyCheckConfig: Option[AnomalyCheckConfig] = None) : this.type = { @@ -298,10 +298,10 @@ private[this] object VerificationRunBuilderHelper { * @param analyzer The analyzer for the metric to run anomaly detection on * @param anomalyCheckConfig Some configuration settings for the Check */ - def getAnomalyCheck[S <: State[S]]( + def getAnomalyCheck( metricsRepository: MetricsRepository, anomalyDetectionStrategy: AnomalyDetectionStrategy, - analyzer: Analyzer[S, Metric[Double]], + analyzer: Analyzer[_ <: State[_], Metric[Double]], anomalyCheckConfig: AnomalyCheckConfig) : Check = { diff --git a/src/main/scala/com/amazon/deequ/analyzers/applicability/Applicability.scala b/src/main/scala/com/amazon/deequ/analyzers/applicability/Applicability.scala index e2c282c14..e03ac6030 100644 --- a/src/main/scala/com/amazon/deequ/analyzers/applicability/Applicability.scala +++ b/src/main/scala/com/amazon/deequ/analyzers/applicability/Applicability.scala @@ -187,7 +187,7 @@ private[deequ] class Applicability(session: SparkSession) { case (name, nc: ConstraintDecorator) => name -> nc.inner case (name, c: Constraint) => name -> c } - .collect { case (name, constraint: AnalysisBasedConstraint[_, _, _]) => + .collect { case (name, constraint: AnalysisBasedConstraint[_, _]) => val metric = constraint.analyzer.calculate(data).value name -> metric } diff --git a/src/main/scala/com/amazon/deequ/checks/Check.scala b/src/main/scala/com/amazon/deequ/checks/Check.scala index 1e1048921..42f2ef986 100644 --- a/src/main/scala/com/amazon/deequ/checks/Check.scala +++ b/src/main/scala/com/amazon/deequ/checks/Check.scala @@ -470,7 +470,7 @@ case class Check( matchColumnMappings: Option[Map[String, String]] = None, hint: Option[String] = None): Check = { val dataMatchAnalyzer = DatasetMatchAnalyzer(otherDataset, keyColumnMappings, assertion, matchColumnMappings) - val constraint = AnalysisBasedConstraint[DatasetMatchState, Double, Double](dataMatchAnalyzer, assertion, + val constraint = AnalysisBasedConstraint[Double, Double](dataMatchAnalyzer, assertion, hint = hint) addConstraint(constraint) } @@ -563,10 +563,10 @@ case class Check( * could have failed * @return */ - private[deequ] def isNewestPointNonAnomalous[S <: State[S]]( + private[deequ] def isNewestPointNonAnomalous( metricsRepository: MetricsRepository, anomalyDetectionStrategy: AnomalyDetectionStrategy, - analyzer: Analyzer[S, Metric[Double]], + analyzer: Analyzer[_ <: State[_], Metric[Double]], withTagValues: Map[String, String], afterDate: Option[Long], beforeDate: Option[Long], @@ -1262,7 +1262,7 @@ case class Check( case c: Constraint => c } .collect { - case constraint: AnalysisBasedConstraint[_, _, _] => constraint.analyzer + case constraint: AnalysisBasedConstraint[_, _] => constraint.analyzer } .map { _.asInstanceOf[Analyzer[_, Metric[_]]] } .toSet @@ -1297,10 +1297,10 @@ object Check { * @param currentMetricValue current metric value * @return */ - private[deequ] def isNewestPointNonAnomalous[S <: State[S]]( + private[deequ] def isNewestPointNonAnomalous( metricsRepository: MetricsRepository, anomalyDetectionStrategy: AnomalyDetectionStrategy, - analyzer: Analyzer[S, Metric[Double]], + analyzer: Analyzer[_ <: State[_], Metric[Double]], withTagValues: Map[String, String], afterDate: Option[Long], beforeDate: Option[Long])( diff --git a/src/main/scala/com/amazon/deequ/constraints/AnalysisBasedConstraint.scala b/src/main/scala/com/amazon/deequ/constraints/AnalysisBasedConstraint.scala index e1a530c97..bbe649f75 100644 --- a/src/main/scala/com/amazon/deequ/constraints/AnalysisBasedConstraint.scala +++ b/src/main/scala/com/amazon/deequ/constraints/AnalysisBasedConstraint.scala @@ -39,8 +39,8 @@ import scala.util.{Failure, Success, Try} * @tparam V : Type of the value being used in assertion function * */ -private[deequ] case class AnalysisBasedConstraint[S <: State[S], M, V]( - analyzer: Analyzer[S, Metric[M]], +private[deequ] case class AnalysisBasedConstraint[M, V]( + analyzer: Analyzer[_ <: State[_], Metric[M]], private[deequ] val assertion: V => Boolean, private[deequ] val valuePicker: Option[M => V] = None, private[deequ] val hint: Option[String] = None) diff --git a/src/main/scala/com/amazon/deequ/constraints/Constraint.scala b/src/main/scala/com/amazon/deequ/constraints/Constraint.scala index e289b3859..4180cf504 100644 --- a/src/main/scala/com/amazon/deequ/constraints/Constraint.scala +++ b/src/main/scala/com/amazon/deequ/constraints/Constraint.scala @@ -122,7 +122,7 @@ object Constraint { } def fromAnalyzer(size: Size, assertion: Long => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[NumMatches, Double, Long](size, + val constraint = AnalysisBasedConstraint[Double, Long](size, assertion, Some(_.toLong), hint) new NamedConstraint(constraint, s"SizeConstraint($size)") @@ -135,7 +135,7 @@ object Constraint { def fromAnalyzer(colCount: ColumnCount, assertion: Long => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[NumMatches, Double, Long](colCount, assertion, Some(_.toLong), hint) + val constraint = AnalysisBasedConstraint[Double, Long](colCount, assertion, Some(_.toLong), hint) new NamedConstraint(constraint, name = s"ColumnCountConstraint($colCount)") } @@ -162,7 +162,7 @@ object Constraint { val histogram = Histogram(column, binningUdf, maxBins, where) - val constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, Distribution, Distribution]( + val constraint = AnalysisBasedConstraint[Distribution, Distribution]( histogram, assertion, hint = hint) new NamedConstraint(constraint, s"HistogramConstraint($histogram)") @@ -191,7 +191,7 @@ object Constraint { val histogram = Histogram(column, binningUdf, maxBins, where, computeFrequenciesAsRatio) - val constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, Distribution, Long]( + val constraint = AnalysisBasedConstraint[Distribution, Long]( histogram, assertion, Some(_.numberOfBins), hint) new NamedConstraint(constraint, s"HistogramBinConstraint($histogram)") @@ -222,7 +222,7 @@ object Constraint { def fromAnalyzer(completeness: Completeness, assertion: Double => Boolean, hint: Option[String] = None): Constraint = { - val constraint = AnalysisBasedConstraint[NumMatchesAndCount, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( completeness, assertion, hint = hint) new RowLevelConstraint(constraint, s"CompletenessConstraint($completeness)", s"Completeness-${completeness.column}") @@ -236,13 +236,13 @@ object Constraint { * (since the metric is double metric) and returns a boolean * @param hint A hint to provide additional context why a constraint could have failed */ - def anomalyConstraint[S <: State[S]]( - analyzer: Analyzer[S, Metric[Double]], + def anomalyConstraint( + analyzer: Analyzer[_ <: State[_], Metric[Double]], anomalyAssertion: Double => Boolean, hint: Option[String] = None) : Constraint = { - val constraint = AnalysisBasedConstraint[S, Double, Double](analyzer, anomalyAssertion, + val constraint = AnalysisBasedConstraint[Double, Double](analyzer, anomalyAssertion, hint = hint) new NamedConstraint(constraint, s"AnomalyConstraint($analyzer)") @@ -272,7 +272,7 @@ object Constraint { } def fromAnalyzer(uniqueness: Uniqueness, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( uniqueness, assertion, hint = hint) new RowLevelGroupedConstraint(constraint, @@ -302,7 +302,7 @@ object Constraint { } def fromAnalyzer(distinctness: Distinctness, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( distinctness, assertion, hint = hint) new NamedConstraint(constraint, s"DistinctnessConstraint($distinctness)") @@ -335,7 +335,7 @@ object Constraint { assertion: Double => Boolean, hint: Option[String]) : Constraint = { - val constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( uniqueValueRatio, assertion, hint = hint) new RowLevelGroupedConstraint(constraint, @@ -368,7 +368,7 @@ object Constraint { } private def fromAnalyzer(compliance: Compliance, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[NumMatchesAndCount, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( compliance, assertion, hint = hint) new RowLevelConstraint( @@ -409,7 +409,7 @@ object Constraint { name: Option[String], hint: Option[String]): Constraint = { val column: String = patternMatch.column - val constraint = AnalysisBasedConstraint[NumMatchesAndCount, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( patternMatch, assertion, hint = hint) val constraintName = name match { @@ -442,7 +442,7 @@ object Constraint { } def fromAnalyzer(entropy: Entropy, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( entropy, assertion, hint = hint) new NamedConstraint(constraint, s"EntropyConstraint($entropy)") @@ -476,7 +476,7 @@ object Constraint { assertion: Double => Boolean, hint: Option[String]) : Constraint = { - val constraint = AnalysisBasedConstraint[FrequenciesAndNumRows, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( mutualInformation, assertion, hint = hint) new NamedConstraint(constraint, s"MutualInformationConstraint($mutualInformation)") @@ -506,7 +506,7 @@ object Constraint { } def fromAnalyzer(approxQuantile: ApproxQuantile, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[ApproxQuantileState, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( approxQuantile, assertion, hint = hint) new NamedConstraint(constraint, s"ApproxQuantileConstraint($approxQuantile)") @@ -536,7 +536,7 @@ object Constraint { } def fromAnalyzer(exactQuantile: ExactQuantile, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[ExactQuantileState, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( exactQuantile, assertion, hint = hint) new NamedConstraint(constraint, s"ExactQuantileConstraint($exactQuantile)") @@ -565,7 +565,7 @@ object Constraint { def fromAnalyzer(maxLength: MaxLength, assertion: Double => Boolean, hint: Option[String]): Constraint = { val column: String = maxLength.column - val constraint = AnalysisBasedConstraint[MaxState, Double, Double](maxLength, assertion, + val constraint = AnalysisBasedConstraint[Double, Double](maxLength, assertion, hint = hint) val updatedAssertion = getUpdatedRowLevelAssertionForLengthConstraint(assertion, maxLength.analyzerOptions) @@ -601,7 +601,7 @@ object Constraint { def fromAnalyzer(minLength: MinLength, assertion: Double => Boolean, hint: Option[String]): Constraint = { val column: String = minLength.column - val constraint = AnalysisBasedConstraint[MinState, Double, Double](minLength, assertion, + val constraint = AnalysisBasedConstraint[Double, Double](minLength, assertion, hint = hint) val updatedAssertion = getUpdatedRowLevelAssertionForLengthConstraint(assertion, minLength.analyzerOptions) @@ -638,7 +638,7 @@ object Constraint { def fromAnalyzer(minimum: Minimum, assertion: Double => Boolean, hint: Option[String]): Constraint = { val column: String = minimum.column - val constraint = AnalysisBasedConstraint[MinState, Double, Double](minimum, assertion, + val constraint = AnalysisBasedConstraint[Double, Double](minimum, assertion, hint = hint) val updatedAssertion = getUpdatedRowLevelAssertion(assertion, minimum.analyzerOptions) @@ -674,7 +674,7 @@ object Constraint { def fromAnalyzer(maximum: Maximum, assertion: Double => Boolean, hint: Option[String]): Constraint = { val column: String = maximum.column - val constraint = AnalysisBasedConstraint[MaxState, Double, Double](maximum, assertion, + val constraint = AnalysisBasedConstraint[Double, Double](maximum, assertion, hint = hint) val updatedAssertion = getUpdatedRowLevelAssertion(assertion, maximum.analyzerOptions) @@ -707,7 +707,7 @@ object Constraint { } def fromAnalyzer(mean: Mean, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[MeanState, Double, Double](mean, assertion, + val constraint = AnalysisBasedConstraint[Double, Double](mean, assertion, hint = hint) new NamedConstraint(constraint, s"MeanConstraint($mean)") @@ -733,7 +733,7 @@ object Constraint { } def fromAnalyzer(sum: Sum, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[SumState, Double, Double](sum, assertion, + val constraint = AnalysisBasedConstraint[Double, Double](sum, assertion, hint = hint) new NamedConstraint(constraint, s"SumConstraint($sum)") @@ -763,7 +763,7 @@ object Constraint { assertion: Double => Boolean, hint: Option[String]) : Constraint = { - val constraint = AnalysisBasedConstraint[StandardDeviationState, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( standardDeviation, assertion, hint = hint) new NamedConstraint(constraint, s"StandardDeviationConstraint($standardDeviation)") @@ -793,7 +793,7 @@ object Constraint { assertion: Double => Boolean, hint: Option[String]) : Constraint = { - val constraint = AnalysisBasedConstraint[ApproxCountDistinctState, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( approxCountDistinct, assertion, hint = hint) new NamedConstraint(constraint, s"ApproxCountDistinctConstraint($approxCountDistinct)") @@ -821,7 +821,7 @@ object Constraint { } def fromAnalyzer(correlation: Correlation, assertion: Double => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[CorrelationState, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( correlation, assertion, hint = hint) new NamedConstraint(constraint, s"CorrelationConstraint($correlation)") @@ -859,7 +859,7 @@ object Constraint { } } - AnalysisBasedConstraint[DataTypeHistogram, Distribution, Double](DataType(column, where), + AnalysisBasedConstraint[Distribution, Double](DataType(column, where), assertion, Some(valuePicker), hint) } @@ -884,7 +884,7 @@ object Constraint { } def fromAnalyzer(kllSketch: KLLSketch, assertion: BucketDistribution => Boolean, hint: Option[String]): Constraint = { - val constraint = AnalysisBasedConstraint[KLLState, BucketDistribution, BucketDistribution]( + val constraint = AnalysisBasedConstraint[BucketDistribution, BucketDistribution]( kllSketch, assertion, hint = hint) new NamedConstraint(constraint, s"kllSketchConstraint($kllSketch)") diff --git a/src/test/scala/com/amazon/deequ/constraints/AnalysisBasedConstraintTest.scala b/src/test/scala/com/amazon/deequ/constraints/AnalysisBasedConstraintTest.scala index a7efbe180..5e30cf628 100644 --- a/src/test/scala/com/amazon/deequ/constraints/AnalysisBasedConstraintTest.scala +++ b/src/test/scala/com/amazon/deequ/constraints/AnalysisBasedConstraintTest.scala @@ -102,7 +102,7 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte val df = getDfMissing(sparkSession) // Analysis result should equal to 1.0 for an existing column - val resultA = calculate(AnalysisBasedConstraint[NumMatches, Double, Double]( + val resultA = calculate(AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("att1"), _ == 1.0), df) assert(resultA.status == ConstraintStatus.Success) @@ -110,7 +110,7 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte assert(resultA.metric.isDefined) // Analysis result should equal to 1.0 for an existing column - val resultB = calculate(AnalysisBasedConstraint[NumMatches, Double, Double]( + val resultB = calculate(AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("att1"), _ != 1.0), df) assert(resultB.status == ConstraintStatus.Failure) @@ -118,7 +118,7 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte assert(resultB.metric.isDefined) // Analysis should fail for a non existing column - val resultC = calculate(AnalysisBasedConstraint[NumMatches, Double, Double]( + val resultC = calculate(AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("someMissingColumn"), _ == 1.0), df) assert(resultC.status == ConstraintStatus.Failure) @@ -133,16 +133,16 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte val df = getDfMissing(sparkSession) // Analysis result should equal to 100.0 for an existing column - assert(calculate(AnalysisBasedConstraint[NumMatches, Double, Double]( + assert(calculate(AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("att1"), _ == 2.0, Some(valueDoubler)), df).status == ConstraintStatus.Success) - assert(calculate(AnalysisBasedConstraint[NumMatches, Double, Double]( + assert(calculate(AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("att1"), _ != 2.0, Some(valueDoubler)), df).status == ConstraintStatus.Failure) // Analysis should fail for a non existing column - assert(calculate(AnalysisBasedConstraint[NumMatches, Double, Double]( + assert(calculate(AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("someMissingColumn"), _ == 2.0, Some(valueDoubler)), df).status == ConstraintStatus.Failure) } @@ -157,17 +157,17 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte ) // Analysis result should equal to 1.0 for an existing column - assert(AnalysisBasedConstraint[NumMatches, Double, Double](SampleAnalyzer("att1"), _ == 1.0) + assert(AnalysisBasedConstraint[Double, Double](SampleAnalyzer("att1"), _ == 1.0) .evaluate(validResults).status == ConstraintStatus.Success) - assert(AnalysisBasedConstraint[NumMatches, Double, Double](SampleAnalyzer("att1"), _ != 1.0) + assert(AnalysisBasedConstraint[Double, Double](SampleAnalyzer("att1"), _ != 1.0) .evaluate(validResults).status == ConstraintStatus.Failure) - assert(AnalysisBasedConstraint[NumMatches, Double, Double]( + assert(AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("someMissingColumn"), _ != 1.0) .evaluate(validResults).status == ConstraintStatus.Failure) // Although assertion would pass, since analysis result is missing, // constraint fails with missing analysis message - AnalysisBasedConstraint[NumMatches, Double, Double](SampleAnalyzer("att1"), _ == 1.0) + AnalysisBasedConstraint[Double, Double](SampleAnalyzer("att1"), _ == 1.0) .evaluate(emptyResults) match { case result => assert(result.status == ConstraintStatus.Failure) @@ -182,7 +182,7 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte val validResults = Map[Analyzer[_, Metric[_]], Metric[_]]( SampleAnalyzer("att1") -> SampleAnalyzer("att1").calculate(df)) - assert(AnalysisBasedConstraint[NumMatches, Double, Double]( + assert(AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("att1"), _ == 2.0, Some(valueDoubler)) .evaluate(validResults).status == ConstraintStatus.Success) } @@ -199,7 +199,7 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte val validResults = Map[Analyzer[_, Metric[_]], Metric[_]]( SampleAnalyzer("att1") -> SampleAnalyzer("att1").calculate(df)) - val constraint = AnalysisBasedConstraint[NumMatches, Double, Double]( + val constraint = AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("att1"), _ == 1.0, Some(problematicValuePicker)) calculate(constraint, df) match { @@ -231,7 +231,7 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte val df = getDfMissing(sparkSession) - val failingConstraint = AnalysisBasedConstraint[NumMatches, Double, Double]( + val failingConstraint = AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("att1"), _ == 0.9, hint = Some("Value should be like ...!")) calculate(failingConstraint, df) match { @@ -252,7 +252,7 @@ class AnalysisBasedConstraintTest extends WordSpec with Matchers with SparkConte def failingAssertion(value: Double): Boolean = throw exception val constraintResult = calculate( - AnalysisBasedConstraint[NumMatches, Double, Double]( + AnalysisBasedConstraint[Double, Double]( SampleAnalyzer("att1"), failingAssertion), df ) diff --git a/src/test/scala/com/amazon/deequ/constraints/ConstraintUtils.scala b/src/test/scala/com/amazon/deequ/constraints/ConstraintUtils.scala index 5782bc18c..eaf56d69f 100644 --- a/src/test/scala/com/amazon/deequ/constraints/ConstraintUtils.scala +++ b/src/test/scala/com/amazon/deequ/constraints/ConstraintUtils.scala @@ -27,6 +27,6 @@ object ConstraintUtils { case c: Constraint => c } - analysisBasedConstraint.asInstanceOf[AnalysisBasedConstraint[_, _, _]].calculateAndEvaluate(df) + analysisBasedConstraint.asInstanceOf[AnalysisBasedConstraint[_, _]].calculateAndEvaluate(df) } } diff --git a/src/test/scala/com/amazon/deequ/constraints/ConstraintsTest.scala b/src/test/scala/com/amazon/deequ/constraints/ConstraintsTest.scala index e4a8ba898..8dfcb7b4f 100644 --- a/src/test/scala/com/amazon/deequ/constraints/ConstraintsTest.scala +++ b/src/test/scala/com/amazon/deequ/constraints/ConstraintsTest.scala @@ -163,14 +163,14 @@ class ConstraintsTest extends WordSpec with Matchers with SparkContextSpec with "Anomaly constraint" should { "assert on anomaly analyzer values" in withSparkSession { sparkSession => val df = getDfMissing(sparkSession) - assert(calculate(Constraint.anomalyConstraint[NumMatchesAndCount]( + assert(calculate(Constraint.anomalyConstraint( Completeness("att1"), _ > 0.4), df).status == ConstraintStatus.Success) - assert(calculate(Constraint.anomalyConstraint[NumMatchesAndCount]( + assert(calculate(Constraint.anomalyConstraint( Completeness("att1"), _ < 0.4), df).status == ConstraintStatus.Failure) - assert(calculate(Constraint.anomalyConstraint[NumMatchesAndCount]( + assert(calculate(Constraint.anomalyConstraint( Completeness("att2"), _ > 0.7), df).status == ConstraintStatus.Success) - assert(calculate(Constraint.anomalyConstraint[NumMatchesAndCount]( + assert(calculate(Constraint.anomalyConstraint( Completeness("att2"), _ < 0.7), df).status == ConstraintStatus.Failure) } } diff --git a/src/test/scala/com/amazon/deequ/suggestions/ConstraintSuggestionsIntegrationTest.scala b/src/test/scala/com/amazon/deequ/suggestions/ConstraintSuggestionsIntegrationTest.scala index d90b16ef7..3a7fb62a8 100644 --- a/src/test/scala/com/amazon/deequ/suggestions/ConstraintSuggestionsIntegrationTest.scala +++ b/src/test/scala/com/amazon/deequ/suggestions/ConstraintSuggestionsIntegrationTest.scala @@ -239,7 +239,7 @@ class ConstraintSuggestionsIntegrationTest extends WordSpec with SparkContextSpe } private[this] def assertConstraintExistsIn(constraintSuggestionResult: ConstraintSuggestionResult) - (func: (Analyzer[State[_], Metric[_]], Double => Boolean) => Boolean) + (func: (Analyzer[_ <: State[_], Metric[_]], Double => Boolean) => Boolean) : Unit = { assert(evaluate(constraintSuggestionResult, func)) @@ -247,7 +247,7 @@ class ConstraintSuggestionsIntegrationTest extends WordSpec with SparkContextSpe private[this] def assertNoConstraintExistsIn( constraintSuggestionResult: ConstraintSuggestionResult)( - func: (Analyzer[State[_], Metric[_]], Double => Boolean) => Boolean) + func: (Analyzer[_ <: State[_], Metric[_]], Double => Boolean) => Boolean) : Unit = { assert(!evaluate(constraintSuggestionResult, func)) @@ -255,7 +255,7 @@ class ConstraintSuggestionsIntegrationTest extends WordSpec with SparkContextSpe private[this] def evaluate( constraintSuggestionResult: ConstraintSuggestionResult, - func: (Analyzer[State[_], Metric[_]], Double => Boolean) => Boolean) + func: (Analyzer[_ <: State[_], Metric[_]], Double => Boolean) => Boolean) : Boolean = { constraintSuggestionResult @@ -266,9 +266,9 @@ class ConstraintSuggestionsIntegrationTest extends WordSpec with SparkContextSpe case constraint: Constraint => constraint } .exists { constraint => - val analysisBasedConstraint = constraint.asInstanceOf[AnalysisBasedConstraint[_, _, _]] + val analysisBasedConstraint = constraint.asInstanceOf[AnalysisBasedConstraint[_, _]] val assertionFunction = analysisBasedConstraint.assertion.asInstanceOf[Double => Boolean] - val analyzer = analysisBasedConstraint.analyzer.asInstanceOf[Analyzer[State[_], Metric[_]]] + val analyzer = analysisBasedConstraint.analyzer func(analyzer, assertionFunction) } From 203f2fc8ea098c14bc9f2b7125408b7bd82884b2 Mon Sep 17 00:00:00 2001 From: Tuan Pham Date: Sun, 15 Sep 2024 15:50:10 +1000 Subject: [PATCH 2/2] Restore original API --- .../amazon/deequ/VerificationRunBuilder.scala | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala b/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala index d665663c0..3c9390998 100644 --- a/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala +++ b/src/main/scala/com/amazon/deequ/VerificationRunBuilder.scala @@ -215,6 +215,22 @@ class VerificationRunBuilderWithRepository( this } + /** + * Add a check using Anomaly Detection methods. The Anomaly Detection Strategy only checks + * if the new value is an Anomaly. + * + * @param anomalyDetectionStrategy The anomaly detection strategy + * @param analyzer The analyzer for the metric to run anomaly detection on + * @param anomalyCheckConfig Some configuration settings for the Check + */ + def addAnomalyCheck[S <: State[S]]( + anomalyDetectionStrategy: AnomalyDetectionStrategy, + analyzer: Analyzer[S, Metric[Double]], + anomalyCheckConfig: Option[AnomalyCheckConfig] = None) + : this.type = { + addAnomalyCheck_(anomalyDetectionStrategy, analyzer, anomalyCheckConfig) + } + /** * Add a check using Anomaly Detection methods. The Anomaly Detection Strategy only checks * if the new value is an Anomaly. @@ -223,7 +239,7 @@ class VerificationRunBuilderWithRepository( * @param analyzer The analyzer for the metric to run anomaly detection on * @param anomalyCheckConfig Some configuration settings for the Check */ - def addAnomalyCheck( + def addAnomalyCheck_( anomalyDetectionStrategy: AnomalyDetectionStrategy, analyzer: Analyzer[_ <: State[_], Metric[Double]], anomalyCheckConfig: Option[AnomalyCheckConfig] = None)