Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use fastjsonschema instead of jsonschema for validation #146

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .changes/unreleased/Under the Hood-20240601-142012.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Under the Hood
body: Replace jsonschema validation with fastjsonschema validation
time: 2024-06-01T14:20:12.79012-04:00
custom:
Author: gshank
Issue: "145"
42 changes: 34 additions & 8 deletions dbt_common/dataclass_schema.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import ClassVar, cast, get_type_hints, List, Tuple, Dict, Any, Optional
import re
import jsonschema
import fastjsonschema
from dataclasses import fields, Field
from enum import Enum
from datetime import datetime
Expand All @@ -21,8 +21,24 @@
import functools


class ValidationError(jsonschema.ValidationError):
pass
class ValidationError(fastjsonschema.JsonSchemaValueException):
def __init__(self, exc):
if isinstance(exc, fastjsonschema.JsonSchemaValueException):
# Copy parts of JsonSchemaValueException into ValidationError
self.message = f"Invalid value '{exc.value}': {exc.message}"
self.msg = self.message
self.value = exc.value
self.name = exc.name
self.definition = exc.definition
self.rule = exc.rule
else:
self.msg = str(exc)
self.message = self.msg

def __str__(self):
# If we don't provide our own string handler, it will return the stringified
# version of the params passed in, i.e. stringified JsonSchemaValueException
return self.msg


class DateTimeSerialization(SerializationStrategy):
Expand Down Expand Up @@ -92,12 +108,22 @@ def json_schema(cls):
return json_schema

@classmethod
def validate(cls, data):
@functools.lru_cache
def jsonschema_validator(cls):
json_schema = cls.json_schema()
validator = jsonschema.Draft7Validator(json_schema)
error = next(iter(validator.iter_errors(data)), None)
if error is not None:
raise ValidationError.create_from(error) from error
# fastjsonschema.compile(definition, handlers={}, formats={}, use_default=True, use_formats=True)[source]
# use_formats=False because of Path type in SourcePatch
# use_default=True would insert all Optional fields into data dictionary
validator = fastjsonschema.compile(json_schema, use_default=False, use_formats=False)
return validator

@classmethod
def validate(cls, data):
validator = cls.jsonschema_validator()
try:
validator(data)
except fastjsonschema.JsonSchemaValueException as exc:
raise ValidationError(exc)

# This method was copied from hologram. Used in model_config.py and relation.py
@classmethod
Expand Down
10 changes: 4 additions & 6 deletions dbt_common/exceptions/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import os

from dbt_common.constants import SECRET_ENV_PREFIX
from dbt_common.dataclass_schema import ValidationError


def env_secrets() -> List[str]:
Expand Down Expand Up @@ -125,12 +124,11 @@ def process_stack(self):
def validator_error_message(self, exc: builtins.Exception):
"""Given a dbt.dataclass_schema.ValidationError return the relevant parts as a string.

dbt.dataclass_schema.ValidationError is basically a jsonschema.ValidationError)
dbt.dataclass_schema.ValidationError is basically a fastjsonschema.JsonSchemaValueException)
"""
if not isinstance(exc, ValidationError):
return str(exc)
path = "[%s]" % "][".join(map(repr, exc.relative_path))
return f"at path {path}: {exc.message}"
# This used to do something, but we've changed the dataclass_schema.ValidationError so that
# this isn't necessary any more. Leaving here for compatibility.
return str(exc)

def __str__(self, prefix: str = "! "):
node_string = ""
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ dependencies = [
"agate>=1.7.0,<1.10",
"colorama>=0.3.9,<0.5",
"isodate>=0.6,<0.7",
"fastjsonschema>=2.16,<=2.20",
"jsonschema>=4.0,<5.0",
"Jinja2>=3.1.3,<4",
"mashumaro[msgpack]>=3.9,<4.0",
Expand Down
57 changes: 56 additions & 1 deletion tests/unit/test_dataclass_schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import pytest
from dataclasses import dataclass
from typing import Dict, Optional

from dbt_common.dataclass_schema import dbtClassMixin
from dbt_common.dataclass_schema import dbtClassMixin, ValidationError, StrEnum


@dataclass
Expand Down Expand Up @@ -40,3 +41,57 @@ def test_serialization_context():

obj = MyObject.from_dict(dct)
assert obj.sub_object.name == "testing"


class MyEnum(StrEnum):
One = "one"
Two = "two"
Three = "three"


@dataclass
class SomeObject(dbtClassMixin):
name: str
an_attr: Optional[str] = None
an_int: int = 1
an_enum: Optional[MyEnum] = None
a_bool: bool = True


def test_validation():
dct = {"name": "testing"}
SomeObject.validate(dct)
# check that use_default is not set in compile method
assert "an_attr" not in dct

dct = {"an_attr": "fubar"}
with pytest.raises(ValidationError) as excinfo:
SomeObject.validate(dct)
# former message: "'name' is a required property"
assert (
excinfo.value.message
== "Invalid value '{'an_attr': 'fubar'}': data must contain ['name'] properties"

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the other error message look good - essentially saying X is invalid because of Y.

In the case, the invalid value '{'an_attr': 'fubar'}' is invalid because it's missing 1 or more properties. I think it's probably still helpful to name the invalid value (so users can identify where they need to go add the missing properties), so I feel good about this one as well.

)

dct = {"name": "testing", "an_int": "some_str"}
with pytest.raises(ValidationError) as excinfo:
SomeObject.validate(dct)
# former message: "'some_str' is not of type 'integer'"
assert excinfo.value.message == "Invalid value 'some_str': data.an_int must be integer"

# Note: any field with multiple types (such as Optional[...]) will get the
# "cannot be validated by any definition" message.
dct = {"name": "testing", "an_enum": "four"}
with pytest.raises(ValidationError) as excinfo:
SomeObject.validate(dct)
# former message: "'four' is not valid under any of the given schemas"
assert (
excinfo.value.message
== "Invalid value 'four': data.an_enum cannot be validated by any definition"
)

dct = {"name": "testing", "a_bool": "True or False"}
with pytest.raises(ValidationError) as excinfo:
SomeObject.validate(dct)
# former message: "'True or False' is not of type 'boolean'"
assert excinfo.value.message == "Invalid value 'True or False': data.a_bool must be boolean"
16 changes: 16 additions & 0 deletions third-party-stubs/fastjsonschema/__init__.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from .exceptions import (
JsonSchemaDefinitionException as JsonSchemaDefinitionException,
JsonSchemaException as JsonSchemaException,
JsonSchemaValueException as JsonSchemaValueException,
)
from .version import VERSION as VERSION

def validate(
definition, data, handlers=..., formats=..., use_default: bool = ..., use_formats: bool = ...
): ...
def compile(
definition, handlers=..., formats=..., use_default: bool = ..., use_formats: bool = ...
): ...
def compile_to_code(
definition, handlers=..., formats=..., use_default: bool = ..., use_formats: bool = ...
): ...
26 changes: 26 additions & 0 deletions third-party-stubs/fastjsonschema/exceptions.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from _typeshed import Incomplete

SPLIT_RE: Incomplete

class JsonSchemaException(ValueError): ...

class JsonSchemaValueException(JsonSchemaException):
message: Incomplete
value: Incomplete
name: Incomplete
definition: Incomplete
rule: Incomplete
def __init__(
self,
message,
value: Incomplete | None = ...,
name: Incomplete | None = ...,
definition: Incomplete | None = ...,
rule: Incomplete | None = ...,
) -> None: ...
@property
def path(self): ...
@property
def rule_definition(self): ...

class JsonSchemaDefinitionException(JsonSchemaException): ...
1 change: 1 addition & 0 deletions third-party-stubs/fastjsonschema/version.pyi
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
VERSION: str
Loading