-
Notifications
You must be signed in to change notification settings - Fork 131
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add pydantic validation * Add pydantic plugin * Add tests for pydantic validation and plugin * Add pydantic to dependencies * Resolve issues from code review * Make type hints backward compatible * Remove `pydantic` constraint for `vaex` Note that vaexio/vaex#2384 has been resolved * Improve pydantic validator test import * Add docstring to the pydantic check_output * Add initial pydantic data quality docs * Fix `pydantic support` title underline * Fix pydantic strict mode link * Fix spacing after `code-block` * Add pydantic plugin details * Fix double quotes for code references * Remove name tags * Add additional docstring example; tweak wording
- Loading branch information
Showing
8 changed files
with
477 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
from typing import Any, Type | ||
|
||
from pydantic import BaseModel, TypeAdapter, ValidationError | ||
|
||
from hamilton.data_quality import base | ||
from hamilton.htypes import custom_subclass_check | ||
|
||
|
||
class PydanticModelValidator(base.BaseDefaultValidator): | ||
"""Pydantic model compatibility validator | ||
Note that this validator uses pydantic's strict mode, which does not allow for | ||
coercion of data. This means that if an object does not exactly match the reference | ||
type, it will fail validation, regardless of whether it could be coerced into the | ||
correct type. | ||
:param model: Pydantic model to validate against | ||
:param importance: Importance of the validator, possible values "warn" and "fail" | ||
:param arbitrary_types_allowed: Whether arbitrary types are allowed in the model | ||
""" | ||
|
||
def __init__(self, model: Type[BaseModel], importance: str): | ||
super(PydanticModelValidator, self).__init__(importance) | ||
self.model = model | ||
self._model_adapter = TypeAdapter(model) | ||
|
||
@classmethod | ||
def applies_to(cls, datatype: Type[Type]) -> bool: | ||
# In addition to checking for a subclass of BaseModel, we also check for dict | ||
# as this is the standard 'de-serialized' format of pydantic models in python | ||
return custom_subclass_check(datatype, BaseModel) or custom_subclass_check(datatype, dict) | ||
|
||
def description(self) -> str: | ||
return "Validates that the returned object is compatible with the specified pydantic model" | ||
|
||
def validate(self, data: Any) -> base.ValidationResult: | ||
try: | ||
# Currently, validate can not alter the output data, so we must use | ||
# strict=True. The downside to this is that data that could be coerced | ||
# into the correct type will fail validation. | ||
self._model_adapter.validate_python(data, strict=True) | ||
except ValidationError as e: | ||
return base.ValidationResult( | ||
passes=False, message=str(e), diagnostics={"model_errors": e.errors()} | ||
) | ||
return base.ValidationResult( | ||
passes=True, | ||
message=f"Data passes pydantic check for model {str(self.model)}", | ||
) | ||
|
||
@classmethod | ||
def arg(cls) -> str: | ||
return "model" | ||
|
||
@classmethod | ||
def name(cls) -> str: | ||
return "pydantic_validator" | ||
|
||
|
||
PYDANTIC_VALIDATORS = [PydanticModelValidator] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
from typing import List | ||
|
||
from pydantic import BaseModel | ||
|
||
from hamilton import node | ||
from hamilton.data_quality import base as dq_base | ||
from hamilton.function_modifiers import InvalidDecoratorException | ||
from hamilton.function_modifiers import base as fm_base | ||
from hamilton.function_modifiers import check_output as base_check_output | ||
from hamilton.function_modifiers.validation import BaseDataValidationDecorator | ||
from hamilton.htypes import custom_subclass_check | ||
|
||
|
||
class check_output(BaseDataValidationDecorator): | ||
def __init__( | ||
self, | ||
importance: str = dq_base.DataValidationLevel.WARN.value, | ||
target: fm_base.TargetType = None, | ||
): | ||
"""Specific output-checker for pydantic models. This decorator utilizes the output type of | ||
the function, which can be any subclass of pydantic.BaseModel. The function output must | ||
be declared with a type hint. | ||
:param model: The pydantic model to use for validation. If this is not provided, then the output type of the function is used. | ||
:param importance: Importance level (either "warn" or "fail") -- see documentation for check_output for more details. | ||
:param target: The target of the decorator -- see documentation for check_output for more details. | ||
Here is an example of how to use this decorator with a function that returns a pydantic model: | ||
.. code-block:: python | ||
from hamilton.plugins import h_pydantic | ||
from pydantic import BaseModel | ||
class MyModel(BaseModel): | ||
a: int | ||
b: float | ||
c: str | ||
@h_pydantic.check_output() | ||
def foo() -> MyModel: | ||
return MyModel(a=1, b=2.0, c="hello") | ||
Alternatively, you can return a dictionary from the function (type checkers will probably | ||
complain about this): | ||
.. code-block:: python | ||
from hamilton.plugins import h_pydantic | ||
from pydantic import BaseModel | ||
class MyModel(BaseModel): | ||
a: int | ||
b: float | ||
c: str | ||
@h_pydantic.check_output() | ||
def foo() -> MyModel: | ||
return {"a": 1, "b": 2.0, "c": "hello"} | ||
You can also use pydantic validation through ``function_modifiers.check_output`` by | ||
providing the model as an argument: | ||
.. code-block:: python | ||
from typing import Any | ||
from hamilton import function_modifiers | ||
from pydantic import BaseModel | ||
class MyModel(BaseModel): | ||
a: int | ||
b: float | ||
c: str | ||
@function_modifiers.check_output(model=MyModel) | ||
def foo() -> dict[str, Any]: | ||
return {"a": 1, "b": 2.0, "c": "hello"} | ||
Note, that because we do not (yet) support modification of the output, the validation is | ||
performed in strict mode, meaning that no data coercion is performed. For example, the | ||
following function will *fail* validation: | ||
.. code-block:: python | ||
from hamilton.plugins import h_pydantic | ||
from pydantic import BaseModel | ||
class MyModel(BaseModel): | ||
a: int # Defined as an int | ||
@h_pydantic.check_output() # This will fail validation! | ||
def foo() -> MyModel: | ||
return MyModel(a="1") # Assigned as a string | ||
For more information about strict mode see the pydantic docs: https://docs.pydantic.dev/latest/concepts/strict_mode/ | ||
""" | ||
super(check_output, self).__init__(target) | ||
self.importance = importance | ||
self.target = target | ||
|
||
def get_validators(self, node_to_validate: node.Node) -> List[dq_base.DataValidator]: | ||
output_type = node_to_validate.type | ||
if not custom_subclass_check(output_type, BaseModel): | ||
raise InvalidDecoratorException( | ||
f"Output of function {node_to_validate.name} must be a Pydantic model" | ||
) | ||
return base_check_output( | ||
importance=self.importance, model=output_type, target_=self.target | ||
).get_validators(node_to_validate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
# Additional requirements on top of hamilton...pydantic |
Oops, something went wrong.