Skip to content

Commit

Permalink
Addition of fix for are complete (#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
canimus authored Aug 21, 2023
1 parent f36dc09 commit 12ca1a4
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 13 deletions.
10 changes: 3 additions & 7 deletions cuallee/pyspark_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,11 @@ def is_complete(self, rule: Rule):

def are_complete(self, rule: Rule):
"""Validation for non-null values in a group of columns"""
predicate = [F.col(f"`{c}`").isNotNull() for c in rule.column]
predicate = (reduce(operator.add, [F.col(f"`{c}`").isNotNull().cast("integer") for c in rule.column]) == len(rule.column)).cast("integer")
self.compute_instruction = ComputeInstruction(
predicate,
reduce(
operator.add,
[self._sum_predicate_to_integer(p) for p in predicate],
)
/ len(rule.column),
ComputeMethod.OBSERVE,
F.sum(predicate),
ComputeMethod.SELECT,
)
return self.compute_instruction

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "cuallee"
version = "0.4.8"
version = "0.4.9"
authors = [
{ name="Virginie Grosboillot", email="[email protected]" },
{ name="Herminio Vazquez", email="[email protected]"}
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[metadata]
name = cuallee
version = 0.4.8
version = 0.4.9
[options]
packages = find:
8 changes: 4 additions & 4 deletions test/unit/pyspark_dataframe/test_are_complete.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_negative(spark):
assert rs.first().status == "FAIL"
assert rs.first().violations == 1
assert rs.first().pass_threshold == 1.0
assert rs.first().pass_rate == 7 / 8
assert rs.first().pass_rate == 3 / 4


@pytest.mark.parametrize(
Expand All @@ -41,9 +41,9 @@ def test_coverage(spark):
[[0, "zero"], [1, None], [2, "deux"], [3, "trois"]], ["id", "desc"]
)
check = Check(CheckLevel.WARNING, "pytest")
check.are_complete(("id", "desc"), 0.7)
check.are_complete(("id", "desc"), 0.75)
rs = check.validate(df)
assert rs.first().status == "PASS"
assert rs.first().violations == 1
assert rs.first().pass_threshold == 0.7
assert rs.first().pass_rate == 7 / 8
assert rs.first().pass_threshold == 0.75
assert rs.first().pass_rate == 3 / 4

0 comments on commit 12ca1a4

Please sign in to comment.