Skip to content

Commit

Permalink
preconf: faster enum handling
Browse files Browse the repository at this point in the history
  • Loading branch information
Tinche committed Nov 10, 2024
1 parent 735446d commit 877dc4c
Show file tree
Hide file tree
Showing 11 changed files with 279 additions and 26 deletions.
4 changes: 4 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,11 @@ Our backwards-compatibility policy can be found [here](https://github.com/python
- Some `defaultdicts` are now [supported by default](https://catt.rs/en/latest/defaulthooks.html#defaultdicts), and
{func}`cattrs.cols.is_defaultdict`{func} and `cattrs.cols.defaultdict_structure_factory` are exposed through {mod}`cattrs.cols`.
([#519](https://github.com/python-attrs/cattrs/issues/519) [#588](https://github.com/python-attrs/cattrs/pull/588))
- Many preconf converters (_bson_, stdlib JSON, _cbor2_, _msgpack_, _msgspec_, _orjson_, _ujson_) skip unstructuring `int` and `str` enums,
leaving them to the underlying libraries to handle with greater efficiency.
([#598](https://github.com/python-attrs/cattrs/pull/598))
- Literals containing enums are now unstructured properly.
([#598](https://github.com/python-attrs/cattrs/pull/598))
- Replace `cattrs.gen.MappingStructureFn` with `cattrs.SimpleStructureHook[In, T]`.
- Python 3.13 is now supported.
([#543](https://github.com/python-attrs/cattrs/pull/543) [#547](https://github.com/python-attrs/cattrs/issues/547))
Expand Down
13 changes: 11 additions & 2 deletions docs/preconf.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,26 @@

The {mod}`cattrs.preconf` package contains factories for preconfigured converters, specifically adjusted for particular serialization libraries.

For example, to get a converter configured for BSON:
For example, to get a converter configured for _orjson_:

```{doctest}
>>> from cattrs.preconf.bson import make_converter
>>> from cattrs.preconf.orjson import make_converter
>>> converter = make_converter() # Takes the same parameters as the `cattrs.Converter`
```

Converters obtained this way can be customized further, just like any other converter.

For compatibility and performance reasons, these converters are usually configured to unstructure differently than ordinary `Converters`.
A couple of examples:
* the {class}`_orjson_ converter <cattrs.preconf.orjson.OrjsonConverter>` is configured to pass `datetime` instances unstructured since _orjson_ can handle them faster.
* the {class}`_msgspec_ JSON converter <cattrs.preconf.msgspec.MsgspecJsonConverter>` is configured to pass through some dataclasses and _attrs_classes,
if the output is identical to what normal unstructuring would have produced, since _msgspec_ can handle them faster.

The intended usage is to pass the unstructured output directly to the underlying library,
or use `converter.dumps` which will do it for you.

These converters support all [default hooks](defaulthooks.md)
and the following additional classes and type annotations,
both for structuring and unstructuring:
Expand Down
11 changes: 11 additions & 0 deletions src/cattrs/preconf/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import sys
from datetime import datetime
from enum import Enum
from typing import Any, Callable, TypeVar

from .._compat import is_subclass

if sys.version_info[:2] < (3, 10):
from typing_extensions import ParamSpec
else:
Expand All @@ -25,3 +28,11 @@ def impl(x: Callable[..., T]) -> Callable[P, T]:
return x

return impl


def is_primitive_enum(type: Any, include_bare_enums: bool = False) -> bool:
"""Is this a string or int enum that can be passed through?"""
return is_subclass(type, Enum) and (
is_subclass(type, (str, int))
or (include_bare_enums and type.mro()[1:] == Enum.mro())
)
8 changes: 7 additions & 1 deletion src/cattrs/preconf/bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@

from ..converters import BaseConverter, Converter
from ..dispatch import StructureHook
from ..fns import identity
from ..strategies import configure_union_passthrough
from . import validate_datetime, wrap
from . import is_primitive_enum, validate_datetime, wrap

T = TypeVar("T")

Expand Down Expand Up @@ -52,6 +53,10 @@ def configure_converter(converter: BaseConverter):
* byte mapping keys are base85-encoded into strings when unstructuring, and reverse
* non-string, non-byte mapping keys are coerced into strings when unstructuring
* a deserialization hook is registered for bson.ObjectId by default
* string and int enums are passed through when unstructuring
.. versionchanged: 24.2.0
Enums are left to the library to unstructure, speeding them up.
"""

def gen_unstructure_mapping(cl: Any, unstructure_to=None):
Expand Down Expand Up @@ -92,6 +97,7 @@ def gen_structure_mapping(cl: Any) -> StructureHook:
converter.register_structure_hook(datetime, validate_datetime)
converter.register_unstructure_hook(date, lambda v: v.isoformat())
converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v))
converter.register_unstructure_hook_func(is_primitive_enum, identity)


@wrap(BsonConverter)
Expand Down
5 changes: 4 additions & 1 deletion src/cattrs/preconf/cbor2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from cattrs._compat import AbstractSet

from ..converters import BaseConverter, Converter
from ..fns import identity
from ..strategies import configure_union_passthrough
from . import wrap
from . import is_primitive_enum, wrap

T = TypeVar("T")

Expand All @@ -28,13 +29,15 @@ def configure_converter(converter: BaseConverter):
* datetimes are serialized as timestamp floats
* sets are serialized as lists
* string and int enums are passed through when unstructuring
"""
converter.register_unstructure_hook(datetime, lambda v: v.timestamp())
converter.register_structure_hook(
datetime, lambda v, _: datetime.fromtimestamp(v, timezone.utc)
)
converter.register_unstructure_hook(date, lambda v: v.isoformat())
converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v))
converter.register_unstructure_hook_func(is_primitive_enum, identity)
configure_union_passthrough(Union[str, bool, int, float, None, bytes], converter)


Expand Down
8 changes: 7 additions & 1 deletion src/cattrs/preconf/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@

from .._compat import AbstractSet, Counter
from ..converters import BaseConverter, Converter
from ..fns import identity
from ..strategies import configure_union_passthrough
from . import wrap
from . import is_primitive_enum, wrap

T = TypeVar("T")

Expand All @@ -29,8 +30,12 @@ def configure_converter(converter: BaseConverter):
* datetimes are serialized as ISO 8601
* counters are serialized as dicts
* sets are serialized as lists
* string and int enums are passed through when unstructuring
* union passthrough is configured for unions of strings, bools, ints,
floats and None
.. versionchanged: 24.2.0
Enums are left to the library to unstructure, speeding them up.
"""
converter.register_unstructure_hook(
bytes, lambda v: (b85encode(v) if v else b"").decode("utf8")
Expand All @@ -40,6 +45,7 @@ def configure_converter(converter: BaseConverter):
converter.register_structure_hook(datetime, lambda v, _: datetime.fromisoformat(v))
converter.register_unstructure_hook(date, lambda v: v.isoformat())
converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v))
converter.register_unstructure_hook_func(is_primitive_enum, identity)
configure_union_passthrough(Union[str, bool, int, float, None], converter)


Expand Down
8 changes: 7 additions & 1 deletion src/cattrs/preconf/msgpack.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
from cattrs._compat import AbstractSet

from ..converters import BaseConverter, Converter
from ..fns import identity
from ..strategies import configure_union_passthrough
from . import wrap
from . import is_primitive_enum, wrap

T = TypeVar("T")

Expand All @@ -28,6 +29,10 @@ def configure_converter(converter: BaseConverter):
* datetimes are serialized as timestamp floats
* sets are serialized as lists
* string and int enums are passed through when unstructuring
.. versionchanged: 24.2.0
Enums are left to the library to unstructure, speeding them up.
"""
converter.register_unstructure_hook(datetime, lambda v: v.timestamp())
converter.register_structure_hook(
Expand All @@ -39,6 +44,7 @@ def configure_converter(converter: BaseConverter):
converter.register_structure_hook(
date, lambda v, _: datetime.fromtimestamp(v, timezone.utc).date()
)
converter.register_unstructure_hook_func(is_primitive_enum, identity)
configure_union_passthrough(Union[str, bool, int, float, None, bytes], converter)


Expand Down
12 changes: 9 additions & 3 deletions src/cattrs/preconf/msgspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,15 @@ def configure_converter(converter: Converter) -> None:
* datetimes and dates are passed through to be serialized as RFC 3339 directly
* enums are passed through to msgspec directly
* union passthrough configured for str, bool, int, float and None
* bare, string and int enums are passed through when unstructuring
.. versionchanged: 24.2.0
Enums are left to the library to unstructure, speeding them up.
"""
configure_passthroughs(converter)

converter.register_unstructure_hook(Struct, to_builtins)
converter.register_unstructure_hook(Enum, to_builtins)
converter.register_unstructure_hook(Enum, identity)

converter.register_structure_hook(Struct, convert)
converter.register_structure_hook(bytes, lambda v, _: b64decode(v))
Expand All @@ -100,7 +104,7 @@ def configure_passthroughs(converter: Converter) -> None:
converter.register_unstructure_hook(bytes, to_builtins)
converter.register_unstructure_hook_factory(is_mapping, mapping_unstructure_factory)
converter.register_unstructure_hook_factory(is_sequence, seq_unstructure_factory)
converter.register_unstructure_hook_factory(has, attrs_unstructure_factory)
converter.register_unstructure_hook_factory(has, msgspec_attrs_unstructure_factory)
converter.register_unstructure_hook_factory(
is_namedtuple, namedtuple_unstructure_factory
)
Expand Down Expand Up @@ -145,7 +149,9 @@ def mapping_unstructure_factory(type, converter: BaseConverter) -> UnstructureHo
return converter.gen_unstructure_mapping(type)


def attrs_unstructure_factory(type: Any, converter: Converter) -> UnstructureHook:
def msgspec_attrs_unstructure_factory(
type: Any, converter: Converter
) -> UnstructureHook:
"""Choose whether to use msgspec handling or our own."""
origin = get_origin(type)
attribs = fields(origin or type)
Expand Down
8 changes: 7 additions & 1 deletion src/cattrs/preconf/orjson.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ..converters import BaseConverter, Converter
from ..fns import identity
from ..strategies import configure_union_passthrough
from . import wrap
from . import is_primitive_enum, wrap

T = TypeVar("T")

Expand All @@ -36,9 +36,12 @@ def configure_converter(converter: BaseConverter):
* sets are serialized as lists
* string enum mapping keys have special handling
* mapping keys are coerced into strings when unstructuring
* bare, string and int enums are passed through when unstructuring
.. versionchanged: 24.1.0
Add support for typed namedtuples.
.. versionchanged: 24.2.0
Enums are left to the library to unstructure, speeding them up.
"""
converter.register_unstructure_hook(
bytes, lambda v: (b85encode(v) if v else b"").decode("utf8")
Expand Down Expand Up @@ -80,6 +83,9 @@ def key_handler(v):
),
]
)
converter.register_unstructure_hook_func(
partial(is_primitive_enum, include_bare_enums=True), identity
)
configure_union_passthrough(Union[str, bool, int, float, None], converter)


Expand Down
11 changes: 8 additions & 3 deletions src/cattrs/preconf/ujson.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@

from ujson import dumps, loads

from cattrs._compat import AbstractSet

from .._compat import AbstractSet
from ..converters import BaseConverter, Converter
from ..fns import identity
from ..strategies import configure_union_passthrough
from . import wrap
from . import is_primitive_enum, wrap

T = TypeVar("T")

Expand All @@ -30,6 +30,10 @@ def configure_converter(converter: BaseConverter):
* bytes are serialized as base64 strings
* datetimes are serialized as ISO 8601
* sets are serialized as lists
* string and int enums are passed through when unstructuring
.. versionchanged: 24.2.0
Enums are left to the library to unstructure, speeding them up.
"""
converter.register_unstructure_hook(
bytes, lambda v: (b85encode(v) if v else b"").decode("utf8")
Expand All @@ -40,6 +44,7 @@ def configure_converter(converter: BaseConverter):
converter.register_structure_hook(datetime, lambda v, _: datetime.fromisoformat(v))
converter.register_unstructure_hook(date, lambda v: v.isoformat())
converter.register_structure_hook(date, lambda v, _: date.fromisoformat(v))
converter.register_unstructure_hook_func(is_primitive_enum, identity)
configure_union_passthrough(Union[str, bool, int, float, None], converter)


Expand Down
Loading

0 comments on commit 877dc4c

Please sign in to comment.