From abaa1767608e4dd68b8874191acbd4f1e1d8c726 Mon Sep 17 00:00:00 2001 From: Matt Richards Date: Mon, 3 Feb 2025 22:43:33 +1100 Subject: [PATCH] trial type annotations Signed-off-by: Matt Richards --- pandera/api/dataframe/model.py | 46 ++++++++++++++++++++++++++++------ 1 file changed, 39 insertions(+), 7 deletions(-) diff --git a/pandera/api/dataframe/model.py b/pandera/api/dataframe/model.py index 95c8e835a..3fb89f94c 100644 --- a/pandera/api/dataframe/model.py +++ b/pandera/api/dataframe/model.py @@ -18,8 +18,14 @@ TypeVar, Union, cast, + overload, ) +# TODO hard dependence on pandas and polars, use string forwardref instead? +import pandas as pd # TODO tmp +import polars as pl +from pandera.typing.polars import LazyFrame + from pandera.api.base.model import BaseModel from pandera.api.base.schema import BaseSchema from pandera.api.checks import Check @@ -271,24 +277,50 @@ def to_yaml(cls, stream: Optional[os.PathLike] = None): """ return cls.to_schema().to_yaml(stream) + # Overloads specify effectively check_obj: TDataFrame -> TDataFrame[TDataFrameModel] + # but to do this directly would required higher kinded typevars (https://github.com/python/typing/issues/548) + + @overload + @classmethod + def validate( + cls: Type[TDataFrameModel], + check_obj: pl.LazyFrame, + head: Optional[int] = None, + tail: Optional[int] = None, + sample: Optional[int] = None, + random_state: Optional[int] = None, + lazy: bool = False, + inplace: bool = False, + ) -> LazyFrame[TDataFrameModel]: ... + + @overload + @classmethod + def validate( + cls: Type[TDataFrameModel], + check_obj: pd.DataFrame, + head: Optional[int] = None, + tail: Optional[int] = None, + sample: Optional[int] = None, + random_state: Optional[int] = None, + lazy: bool = False, + inplace: bool = False, + ) -> DataFrame[TDataFrameModel]: ... + @classmethod @docstring_substitution(validate_doc=BaseSchema.validate.__doc__) def validate( cls: Type[TDataFrameModel], - check_obj: TDataFrame, + check_obj: pd.DataFrame | pl.LazyFrame, head: Optional[int] = None, tail: Optional[int] = None, sample: Optional[int] = None, random_state: Optional[int] = None, lazy: bool = False, inplace: bool = False, - ) -> DataFrameBase[TDataFrameModel]: + ) -> DataFrame[TDataFrameModel] | LazyFrame[TDataFrameModel]: """%(validate_doc)s""" - return cast( - DataFrameBase[TDataFrameModel], - cls.to_schema().validate( - check_obj, head, tail, sample, random_state, lazy, inplace - ), + return cls.to_schema().validate( + check_obj, head, tail, sample, random_state, lazy, inplace ) # TODO: add docstring_substitution using generic class