Skip to content

Commit

Permalink
refactor: Standardize on JSON Schema Draft 2020-12 to validate stream…
Browse files Browse the repository at this point in the history
… schemas
  • Loading branch information
edgarrmondragon committed Jul 27, 2024
1 parent 433b7fe commit 2ee7181
Show file tree
Hide file tree
Showing 26 changed files with 116 additions and 27 deletions.
53 changes: 52 additions & 1 deletion singer_sdk/_singerlib/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@
if t.TYPE_CHECKING:
from referencing._core import Resolver

META_KEYS = [
"id",
"schema",
]

# These are keys defined in the JSON Schema spec that do not themselves contain
# schemas (or lists of schemas)
STANDARD_KEYS = [
Expand Down Expand Up @@ -52,6 +57,9 @@ class Schema:
This is because we wanted to expand it with extra STANDARD_KEYS.
"""

id: str | None = None
schema: str | None = None

type: str | list[str] | None = None
default: t.Any | None = None
properties: dict | None = None
Expand Down Expand Up @@ -94,6 +102,10 @@ def to_dict(self) -> dict[str, t.Any]:
if self.__dict__.get(key) is not None:
result[key] = self.__dict__[key]

for key in META_KEYS:
if self.__dict__.get(key) is not None:
result[f"${key}"] = self.__dict__[key]

return result

@classmethod
Expand All @@ -110,7 +122,40 @@ def from_dict(
Returns:
The initialized Schema object.
"""
Example:
>>> data = {
... "$id": "https://example.com/person.schema.json",
... "$schema": "http://json-schema.org/draft/2020-12/schema",
... "title": "Person",
... "type": "object",
... "properties": {
... "firstName": {
... "type": "string",
... "description": "The person's first name.",
... },
... "lastName": {
... "type": "string",
... "description": "The person's last name.",
... },
... "age": {
... "description": "Age in years which must be equal to or greater than zero.",
... "type": "integer",
... "minimum": 0,
... },
... },
... "required": ["firstName", "lastName"],
... }
>>> schema = Schema.from_dict(data)
>>> schema.title
'Person'
>>> schema.properties["firstName"].description
"The person's first name."
>>> schema.properties["age"].minimum
0
>>> schema.schema
'http://json-schema.org/draft/2020-12/schema'
""" # noqa: E501
kwargs = schema_defaults.copy()
properties = data.get("properties")
items = data.get("items")
Expand All @@ -121,9 +166,15 @@ def from_dict(
}
if items is not None:
kwargs["items"] = cls.from_dict(items, **schema_defaults)

for key in STANDARD_KEYS:
if key in data:
kwargs[key] = data[key]

for key in META_KEYS:
if f"${key}" in data:
kwargs[key] = data[f"${key}"]

return cls(**kwargs)


Expand Down
4 changes: 2 additions & 2 deletions singer_sdk/plugin_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from types import MappingProxyType

import click
from jsonschema import Draft7Validator
from jsonschema import Draft202012Validator

from singer_sdk import about, metrics
from singer_sdk.cli import plugin_cli
Expand All @@ -36,7 +36,7 @@

SDK_PACKAGE_NAME = "singer_sdk"

JSONSchemaValidator = extend_validator_with_defaults(Draft7Validator)
JSONSchemaValidator = extend_validator_with_defaults(Draft202012Validator)


class MapperNotInitialized(Exception):
Expand Down
2 changes: 1 addition & 1 deletion singer_sdk/sinks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def __init__(
Raises:
InvalidJSONSchema: If the schema provided from tap or mapper is invalid.
"""
jsonschema_validator = jsonschema.Draft7Validator
jsonschema_validator = jsonschema.Draft202012Validator

super().__init__(schema)
if validate_formats:
Expand Down
2 changes: 2 additions & 0 deletions singer_sdk/testing/suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
StreamRecordMatchesStreamSchema,
StreamRecordSchemaMatchesCatalogTest,
StreamReturnsRecordTest,
StreamSchemaIsValidTest,
TapCLIPrintsTest,
TapDiscoveryTest,
TapStreamConnectionTest,
Expand Down Expand Up @@ -72,6 +73,7 @@ class TestSuite(t.Generic[T]):
StreamRecordMatchesStreamSchema,
StreamRecordSchemaMatchesCatalogTest,
StreamReturnsRecordTest,
StreamSchemaIsValidTest,
StreamPrimaryKeysTest,
],
)
Expand Down
29 changes: 26 additions & 3 deletions singer_sdk/testing/tap_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
import typing as t
import warnings

from jsonschema import Draft7Validator
from jsonschema import Draft202012Validator
from jsonschema.exceptions import SchemaError

import singer_sdk.helpers._typing as th
from singer_sdk import Tap
Expand Down Expand Up @@ -71,6 +72,28 @@ def test(self) -> None:
assert "progress_markers" not in final_state, self.message


class StreamSchemaIsValidTest(StreamTestTemplate):
"""Test that a stream's schema is valid."""

name = "schema_is_valid"

def test(self) -> None:
"""Run test.
Raises:
AssertionError: if schema is not valid.
"""
assert self.stream.schema

validator = Draft202012Validator(self.stream.schema)

try:
validator.check_schema(self.stream.schema)
except SchemaError as e: # pragma: no cover
msg = f"Schema is not valid: {e}"
raise AssertionError(msg) from e


class StreamReturnsRecordTest(StreamTestTemplate):
"""Test that a stream sync returns at least 1 record."""

Expand Down Expand Up @@ -134,9 +157,9 @@ class StreamRecordMatchesStreamSchema(StreamTestTemplate):
def test(self) -> None:
"""Run test."""
schema = self.stream.schema
validator = Draft7Validator(
validator = Draft202012Validator(
schema,
format_checker=Draft7Validator.FORMAT_CHECKER,
format_checker=Draft202012Validator.FORMAT_CHECKER,
)
for record in self.stream_records:
errors = list(validator.iter_errors(record))
Expand Down
11 changes: 11 additions & 0 deletions singer_sdk/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1075,6 +1075,17 @@ def append(self, property: Property) -> None: # noqa: A002
"""
self.wrapped[property.name] = property

@property
def type_dict(self) -> dict: # type: ignore[override]
"""Get type dictionary.
Returns:
A dictionary describing the type.
"""
d = super().type_dict
d["$schema"] = "https://json-schema.org/draft/2020-12/schema"
return d

def __iter__(self) -> t.Iterator[Property]:
"""Iterate all properties of the property list.
Expand Down
4 changes: 3 additions & 1 deletion tests/core/test_jsonschema_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ def test_to_json():
"required": [
"test_property"
],
"additionalProperties": false
"additionalProperties": false,
"$schema": "https://json-schema.org/draft/2020-12/schema"
}""",
)

Expand All @@ -172,6 +173,7 @@ def test_any_type(caplog: pytest.LogCaptureFixture):
)
with caplog.at_level(WARNING):
assert schema.to_dict() == {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"any_type": {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
{"type":"SCHEMA","stream":"continents","schema":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":"object"},"key_properties":["code"]}
{"type":"SCHEMA","stream":"countries","schema":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]},"native":{"type":["string","null"]},"phone":{"type":["string","null"]},"capital":{"type":["string","null"]},"currency":{"type":["string","null"]},"emoji":{"type":["string","null"]},"continent":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":["object","null"]},"languages":{"items":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":"object"},"type":["array","null"]}},"type":"object"},"key_properties":["code"]}
{"type":"SCHEMA","stream":"countries","schema":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]},"native":{"type":["string","null"]},"phone":{"type":["string","null"]},"capital":{"type":["string","null"]},"currency":{"type":["string","null"]},"emoji":{"type":["string","null"]},"continent":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":["object","null"]},"languages":{"items":{"properties":{"code":{"type":["string","null"]},"name":{"type":["string","null"]}},"type":"object"},"type":["array","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":["code"]}
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/aliased_stream.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"aliased_stream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"aliased_stream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"aliased_stream","record":{"email":"[email protected]","count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"aliased_stream","record":{"email":"[email protected]","count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"aliased_stream","record":{"email":"[email protected]","count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/changed_key_properties.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email_hash":{"type":["string","null"]}}},"key_properties":["email_hash"]}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"email_hash":{"type":["string","null"]}},"$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":["email_hash"]}
{"type":"RECORD","stream":"mystream","record":{"email_hash":"c160f8cc69a4f0bf2b0362752353d060"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email_hash":"4b9bb80620f03eb3719e0a061c14283d"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email_hash":"426b189df1e2f359efe6ee90f2d2030f"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/drop_property.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"cc":{"type":["string","null"]}}},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"type":"object","properties":{"cc":{"type":["string","null"]}},"$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"cc":"4201040137208265027"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"cc":"675987782884"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"cc":"502011811259"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/flatten_all.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub__num":{"type":["integer","null"]},"user__sub__custom_obj":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub__num":{"type":["integer","null"]},"user__sub__custom_obj":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":21,"user__id":1,"user__sub__num":1,"user__sub__custom_obj":"obj-hello","user__some_numbers":"[3.14,2.718]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":13,"user__id":2,"user__sub__num":2,"user__sub__custom_obj":"obj-world","user__some_numbers":"[10.32,1.618]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":19,"user__id":3,"user__sub__num":3,"user__sub__custom_obj":"obj-hello","user__some_numbers":"[1.414,1.732]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/flatten_depth_0.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user":{"properties":{"id":{"type":["integer","null"]},"sub":{"properties":{"num":{"type":["integer","null"]},"custom_obj":{"type":["string","null"]}},"type":["object","null"]},"some_numbers":{"items":{"type":["number"]},"type":["array","null"]}},"type":["object","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":21,"user":{"id":1,"sub":{"num":1,"custom_obj":"obj-hello"},"some_numbers":[3.14,2.718]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":13,"user":{"id":2,"sub":{"num":2,"custom_obj":"obj-world"},"some_numbers":[10.32,1.618]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":19,"user":{"id":3,"sub":{"num":3,"custom_obj":"obj-hello"},"some_numbers":[1.414,1.732]}},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
2 changes: 1 addition & 1 deletion tests/snapshots/mapped_stream/flatten_depth_1.jsonl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{"type":"STATE","value":{}}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]}},"type":"object"},"key_properties":[]}
{"type":"SCHEMA","stream":"mystream","schema":{"properties":{"email":{"type":["string","null"]},"count":{"type":["integer","null"]},"user__id":{"type":["integer","null"]},"user__sub":{"type":["string","null"]},"user__some_numbers":{"type":["string","null"]}},"type":"object","$schema":"https://json-schema.org/draft/2020-12/schema"},"key_properties":[]}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":21,"user__id":1,"user__sub":"{\"num\":1,\"custom_obj\":\"obj-hello\"}","user__some_numbers":"[3.14,2.718]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":13,"user__id":2,"user__sub":"{\"num\":2,\"custom_obj\":\"obj-world\"}","user__some_numbers":"[10.32,1.618]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
{"type":"RECORD","stream":"mystream","record":{"email":"[email protected]","count":19,"user__id":3,"user__sub":"{\"num\":3,\"custom_obj\":\"obj-hello\"}","user__some_numbers":"[1.414,1.732]"},"time_extracted":"2022-01-01T00:00:00+00:00"}
Expand Down
Loading

0 comments on commit 2ee7181

Please sign in to comment.