Skip to content

Commit

Permalink
🐛 Changed Protocol usage for FailureCaseParser
Browse files Browse the repository at this point in the history
  • Loading branch information
Luanee committed Sep 27, 2024
1 parent b8b45f0 commit ab7566c
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 23 deletions.
54 changes: 36 additions & 18 deletions pandera_report/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,23 +7,16 @@
from .options import QUALITY_STATUS_OPTIONS, QualityStatusOptions


class FailureCaseParser(Protocol):
class FailureCaseParserProtocol(Protocol):
"""
An abstract base class for pandera's failure cases dataframe.
This class defines the basic structure and properties of failure case parsers.
"""

_valid: str
_invalid: str
_none: str

def __init__(self, status: Optional[QualityStatusOptions] = None):
status = status or QUALITY_STATUS_OPTIONS

self._valid = status["valid"]
self._invalid = status["invalid"]
self._none = status["none"]
valid_status: str
invalid_status: str
none_status: str

# pylint: disable=missing-function-docstring
@abc.abstractmethod
Expand All @@ -45,6 +38,31 @@ def create_failure_case(self, column: str, check: str) -> str:
# pylint: enable=missing-function-docstring


class FailureCaseParser(FailureCaseParserProtocol):
"""
An base class for pandera's failure cases dataframe.
"""

def __init__(self, status: Optional[QualityStatusOptions] = None):
status = status or QUALITY_STATUS_OPTIONS

self.valid_status = status["valid"]
self.invalid_status = status["invalid"]
self.none_status = status["none"]

def parse_failure_cases(self, df: pd.DataFrame, number_of_rows: int) -> tuple[pd.Series, pd.Series]:
return NotImplemented

def create_quality_issues_series(self, df: pd.DataFrame) -> pd.Series:
return NotImplemented

def create_quality_status_series(self, series_issues: pd.Series) -> pd.Series:
return NotImplemented

def create_failure_case(self, column: str, check: str) -> str:
return NotImplemented


class DefaultFailureCaseParser(FailureCaseParser):
"""
A default implementation of the FailureCaseParser abstract class.
Expand All @@ -55,14 +73,14 @@ class DefaultFailureCaseParser(FailureCaseParser):
If not provided, the default quality status options are used.
Attributes:
_valid (str): The valid quality status.
_invalid (str): The invalid quality status.
valid_status (str): The valid quality status.
invalid_status (str): The invalid quality status.
_none (str): The none quality status.
"""

# pylint: disable=W0246
def __init__(self, status: Optional[QualityStatusOptions] = None):
super().__init__(status)
valid_status: str
invalid_status: str
none_status: str

def parse_failure_cases(self, df: pd.DataFrame, number_of_rows: int):
"""
Expand Down Expand Up @@ -135,7 +153,7 @@ def fill_series_with_none(self, series: pd.Series, number_of_rows: int) -> pd.Se
Returns:
pd.Series: A series filled with the "none" quality status.
"""
return series.reindex(range(number_of_rows), fill_value=self._none)
return series.reindex(range(number_of_rows), fill_value=self.none_status)

def create_quality_status_series(self, series_issues: pd.Series) -> pd.Series:
"""
Expand All @@ -147,4 +165,4 @@ def create_quality_status_series(self, series_issues: pd.Series) -> pd.Series:
Returns:
pd.Series: A series containing quality status based on the issues.
"""
return pd.Series(np.where(series_issues == self._none, self._valid, self._invalid))
return pd.Series(np.where(series_issues == self.none_status, self.valid_status, self.invalid_status))
4 changes: 2 additions & 2 deletions pandera_report/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandera.errors import SchemaError, SchemaErrors

from pandera_report.options import QUALITY_COLUMNS_OPTIONS, QualityColumnsOptions
from pandera_report.parser import DefaultFailureCaseParser, FailureCaseParser
from pandera_report.parser import DefaultFailureCaseParser, FailureCaseParserProtocol


class DataFrameValidator:
Expand All @@ -31,7 +31,7 @@ def __init__(
quality_report: bool = True,
lazy: bool = True,
columns: Optional[QualityColumnsOptions] = None,
parser: Optional[FailureCaseParser] = None,
parser: Optional[FailureCaseParserProtocol] = None,
):
self.quality_report = quality_report
self.lazy = lazy
Expand Down
7 changes: 4 additions & 3 deletions tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from pandera.typing import Series

from pandera_report.options import QualityColumnsOptions
from pandera_report.parser import FailureCaseParser
from pandera_report.parser import FailureCaseParserProtocol
from pandera_report.validator import DataFrameValidator

schema = pa.DataFrameSchema(
Expand Down Expand Up @@ -91,14 +91,15 @@ def test_validator_validate(
quality_report: bool,
lazy: bool,
columns: Optional[QualityColumnsOptions],
parser: Optional[FailureCaseParser],
parser: Optional[FailureCaseParserProtocol],
exception,
request,
):
df = cast(pd.DataFrame, request.getfixturevalue(df_fixture))
org_columns = df.columns.to_list()
validator = DataFrameValidator(quality_report, lazy, columns, parser)

print(validator._parser)
print(validator._parser.__dict__)
with exception:
df = validator.validate(schema, df)

Expand Down

0 comments on commit ab7566c

Please sign in to comment.