Skip to content

Commit

Permalink
Merge pull request #7 from tzoiker/feature/additional-properties
Browse files Browse the repository at this point in the history
Support additionalProperties
  • Loading branch information
tzoiker authored Nov 3, 2023
2 parents 2143888 + 3b38ef1 commit f5ef266
Show file tree
Hide file tree
Showing 10 changed files with 200 additions and 97 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
# Changelog

## 0.1.1

### Features
- Support `additionalProperties` (`dict[str, Model]`)
44 changes: 38 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,9 @@ Supports
* `python>=3.8,<3.12`
* `simdjson>=2,<6` (with caveats)

Does not support complex schemas (yet), e.g.
Does not support complex schemas (it may be not very reasonable from the
practical standpoint anyway), e.g.,
* `anyOf` (`Union[Model1, Model2]`)
* `additionalProperties` (`dict[str, Model]`)
* ...

In such cases it will fully (not lazily) load the underlying objects.
Expand Down Expand Up @@ -125,6 +125,38 @@ assert parsed == [
]
```

Example with `additionalProperties`:

<!-- name: test_basic -->
```python
schema = {
"type": "object",
"additionalProperties": {
"$ref": "#/definitions/Model",
},
"definitions": {
"Model": {
"type": "object",
"properties": {
"key": {"type": "integer"},
}
}
}
}

data = json.dumps({
"some": {"key": 0, "other": 1},
"other": {"missing": 2},
})

parsed = loads(data, schema=schema)

assert parsed == {
"some": {"key": 0},
"other": {},
}
```

### <a name="usage_reusing_parser"/> Reusing parser

With re-used simdjson parser **(recommended when used in a single thread,
Expand All @@ -137,10 +169,10 @@ from simdjson import Parser
parser = Parser()
parsed = loads(data, schema=schema, parser=parser)

assert parsed == [
{"key": 0},
{},
]
assert parsed == {
"some": {"key": 0},
"other": {},
}
```

### <a name="usage_pydantic_v1"/> Pydantic v1
Expand Down
73 changes: 42 additions & 31 deletions simdjson_schemaful/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,12 @@
_FuncSet = Callable[..., None]


_CACHE: Dict[str, Schema] = {}

# TODO: handle additionalProperties, for example Dict[str, Model]
# https://json-schema.org/understanding-json-schema/reference/object

# TODO: handle anyOf
# TODO: handle anyOf?


def _get_definition(definitions: Dict[str, Schema], schema: Schema) -> Schema:
ref = schema.get("$ref")
if ref:
definition = _CACHE.get(ref)
if not definition:
definition = definitions[ref.split("/")[-1]]
_CACHE[ref] = definition
return definition
if ref := schema.get("$ref"):
return definitions[ref.split("/")[-1]]
return schema


Expand Down Expand Up @@ -65,7 +55,9 @@ def _process_prop(
return

if (not type_ and not prop_data.get("$ref")) or (
type_ == "object" and not prop_data.get("properties")
type_ == "object"
and not prop_data.get("properties")
and not prop_data.get("additionalProperties")
):
if not isinstance(value, simdjson.Object):
raise ValueError(
Expand Down Expand Up @@ -127,26 +119,45 @@ def _loads( # noqa: C901
)

properties = schema.get("properties", {})
if not properties:
target.update(source.as_dict()) # type: ignore

if properties:
for prop_name, prop_data in properties.items():
value = None
try:
value = source[prop_name]
except KeyError:
continue

_process_prop(
prop_data=prop_data,
prop=prop_name,
value=value,
target=target,
func_set=_set_dict,
definitions=definitions,
queue=queue,
)
continue

for prop_name, prop_data in properties.items():
value = None
try:
value = source[prop_name]
except KeyError:
continue
additional_properties = _get_definition(
definitions=definitions,
schema=schema.get("additionalProperties", {}),
)
if additional_properties:
for prop_name in source.keys():
value = source.get(prop_name)
_process_prop(
prop_data=additional_properties,
prop=prop_name,
value=value,
target=target,
func_set=_set_dict,
definitions=definitions,
queue=queue,
)
continue

_process_prop(
prop_data=prop_data,
prop=prop_name,
value=value,
target=target,
func_set=_set_dict,
definitions=definitions,
queue=queue,
)
target.update(source.as_dict()) # type: ignore

elif type_ == "array":
if not isinstance(source, simdjson.Array):
Expand Down
3 changes: 2 additions & 1 deletion tests/pydantic/v1/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from importlib.util import find_spec
from typing import Optional, Sequence
from typing import Dict, Optional, Sequence

from pydantic import Extra

Expand All @@ -22,6 +22,7 @@ class Model2(BaseModel):
f: float

l2: Model2
l2_model_values: Optional[Dict[str, Model2]]

l1_list: Sequence[Model1]
l1_dict: Optional[Model1]
18 changes: 15 additions & 3 deletions tests/pydantic/v1/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,13 @@ def test_nested_not_an_array():
def test_ok():
data = {
"l1_list": [
{"l2": {"s": "0", "i": 0, "f": 0.0, "other": "value"}},
{
"l2": {"s": "0", "i": 0, "f": 0.0, "other": "value"},
"l2_model_values": {
"some": {"s": "0", "i": 0, "f": 0.0, "other": "value"},
"other": {"s": "1", "i": 1, "f": 1.0, "other": "value"},
},
},
{"l2": {"s": "1", "i": 1, "f": 1.0, "another": "value"}},
],
"l1_dict": {
Expand All @@ -56,15 +62,21 @@ def test_ok():
}
expected = {
"l1_list": [
{"l2": {"s": "0", "i": 0, "f": 0.0}},
{
"l2": {"s": "0", "i": 0, "f": 0.0},
"l2_model_values": {
"some": {"s": "0", "i": 0, "f": 0.0},
"other": {"s": "1", "i": 1, "f": 1.0},
},
},
{"l2": {"s": "1", "i": 1, "f": 1.0}},
],
"l1_dict": {
"l2": {"s": "2", "i": 2, "f": 2.0},
},
}
parsed = ModelNested.parse_raw_simdjson(dumps(data))
assert parsed.dict() == expected
assert parsed.dict(exclude_none=True) == expected


def test_missing_required():
Expand Down
43 changes: 19 additions & 24 deletions tests/pydantic/v1/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,16 @@ def test_union_fail():
parse_raw_simdjson_as(model, data)


def test_dict_model_value_fail():
def test_dict_model_value_ok():
model = Dict[str, Model]
data = dumps({"key": {"some": 0, "value": 1}})
with pytest.raises(
ValidationError,
match=re.escape(
"1 validation error for ParsingModel[Dict[str, tests.pydantic.v1.conftest."
"Model]]\n__root__ -> key -> some\n extra fields not permitted (type=va"
"lue_error.extra)"
),
):
parse_raw_simdjson_as(model, data)
assert parse_raw_simdjson_as(model, data)


def test_nested_dict_model_value_fail():
def test_nested_dict_model_value_ok():
model = List[Dict[str, Model]]
data = dumps([{"key": {"some": 0, "value": 1}}])
with pytest.raises(
ValidationError,
match=re.escape(
"1 validation error for ParsingModel[List[Dict[str, tests.pydantic.v1.conft"
"est.Model]]]\n__root__ -> 0 -> key -> some\n extra fields not permitt"
"ed (type=value_error.extra)"
),
):
parse_raw_simdjson_as(model, data)
assert parse_raw_simdjson_as(model, data)


def test_not_an_object():
Expand Down Expand Up @@ -97,22 +81,33 @@ def test_ok():
data = [
{
"l1_list": [
{"l2": {"s": "0", "i": 0, "f": 0.0, "other": "value"}},
{
"l2": {"s": "0", "i": 0, "f": 0.0, "other": "value"},
"l2_model_values": {
"some": {"s": "0", "i": 0, "f": 0.0, "other": "value"},
"other": {"s": "1", "i": 1, "f": 1.0, "other": "value"},
},
},
{"l2": {"s": "1", "i": 1, "f": 1.0, "another": "value"}},
],
}
]
expected = [
{
"l1_list": [
{"l2": {"s": "0", "i": 0, "f": 0.0}},
{
"l2": {"s": "0", "i": 0, "f": 0.0},
"l2_model_values": {
"some": {"s": "0", "i": 0, "f": 0.0},
"other": {"s": "1", "i": 1, "f": 1.0},
},
},
{"l2": {"s": "1", "i": 1, "f": 1.0}},
],
"l1_dict": None,
}
]
(parsed,) = parse_raw_simdjson_as(List[ModelNested], dumps(data))
assert [parsed.dict()] == expected
assert [parsed.dict(exclude_none=True)] == expected


def test_raw_missing_required():
Expand Down
3 changes: 2 additions & 1 deletion tests/pydantic/v2/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from importlib.util import find_spec
from typing import Optional, Sequence
from typing import Dict, Optional, Sequence

from pydantic import ConfigDict

Expand All @@ -21,6 +21,7 @@ class Model2(BaseModel):
f: float

l2: Model2
l2_model_values: Optional[Dict[str, Model2]] = None

l1_list: Sequence[Model1]
l1_dict: Optional[Model1] = None
18 changes: 15 additions & 3 deletions tests/pydantic/v2/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,13 @@ def test_nested_not_an_array():
def test_ok():
data = {
"l1_list": [
{"l2": {"s": "0", "i": 0, "f": 0.0, "other": "value"}},
{
"l2": {"s": "0", "i": 0, "f": 0.0, "other": "value"},
"l2_model_values": {
"some": {"s": "0", "i": 0, "f": 0.0, "other": "value"},
"other": {"s": "1", "i": 1, "f": 1.0, "other": "value"},
},
},
{"l2": {"s": "1", "i": 1, "f": 1.0, "another": "value"}},
],
"l1_dict": {
Expand All @@ -61,15 +67,21 @@ def test_ok():
}
expected = {
"l1_list": [
{"l2": {"s": "0", "i": 0, "f": 0.0}},
{
"l2": {"s": "0", "i": 0, "f": 0.0},
"l2_model_values": {
"some": {"s": "0", "i": 0, "f": 0.0},
"other": {"s": "1", "i": 1, "f": 1.0},
},
},
{"l2": {"s": "1", "i": 1, "f": 1.0}},
],
"l1_dict": {
"l2": {"s": "2", "i": 2, "f": 2.0},
},
}
parsed = ModelNested.model_validate_simdjson(dumps(data))
assert parsed.model_dump() == expected
assert parsed.model_dump(exclude_none=True) == expected


def test_missing_required():
Expand Down
Loading

0 comments on commit f5ef266

Please sign in to comment.