From f67caf28e85900db9fa2019bee54b3f4a821c140 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Tue, 17 Dec 2019 17:32:48 +0100 Subject: [PATCH 1/2] Default values are now validated/converted as normal values. If the default value is provided in `default=` or as a `default_factory=copy_value()`, this is done only **once per field**, to accelerate future access. If the value was converted on the way, the converted value is used to replace the default value, or the default value copied by the factory. Fixed [#57](https://github.com/smarie/python-pyfields/issues/57) --- pyfields/core.py | 64 +++++++++++++++++++++++++++-- pyfields/helpers.py | 81 ++++++++++++++++++++++++++++--------- pyfields/tests/test_core.py | 75 ++++++++++++++++++++++++++++++++++ 3 files changed, 199 insertions(+), 21 deletions(-) diff --git a/pyfields/core.py b/pyfields/core.py index 159f278..c8180fa 100644 --- a/pyfields/core.py +++ b/pyfields/core.py @@ -870,11 +870,17 @@ def __str__(self): % (self.field.qualname, ) +# default value policies +_NO = None +_NO_BUT_CAN_CACHE_FIRST_RESULT = False +_YES = True + + class DescriptorField(Field): """ General-purpose implementation for fields that require type-checking or validation or converter """ - __slots__ = 'root_validator', 'check_type', 'converters', 'read_only' + __slots__ = 'root_validator', 'check_type', 'converters', 'read_only', '_default_is_safe' @classmethod def create_from_field(cls, @@ -940,6 +946,28 @@ def __init__(self, # read-only self.read_only = read_only + # self._default_is_safe is used to know if we should validate/convert the default value before use + # - None means "always". This is the case when there is a default factory we can't modify + # - False means "once", and then True means "not anymore" (after first validation). This is the case + # when we can modify the default value so that we can replace it with the possibly converted one + if default is not EMPTY: + # a fixed default value is here, we'll validate it once and for all + self._default_is_safe = _NO_BUT_CAN_CACHE_FIRST_RESULT + elif default_factory is not None: + # noinspection PyBroadException + try: + # is this the `copy_value` factory ? + default_factory.set_copied_value + except Exception: + # No: the factory can be anything else + self._default_is_safe = _NO + else: + # Yes: we can replace the value that it uses on first + self._default_is_safe = _NO_BUT_CAN_CACHE_FIRST_RESULT + else: + # no default at all + self._default_is_safe = _NO + def add_validator(self, validator # type: ValidatorDef ): @@ -997,7 +1025,32 @@ def __get__(self, obj, obj_type): value = self.default # nominal initialization on first read: we set the attribute in the object - setattr(obj, private_name, value) + if self._default_is_safe is _YES: + # no need to validate/convert the default value, fast track (use the private name directly) + setattr(obj, private_name, value) + else: + # we need conversion and validation - go through the setter (same as using the public name) + possibly_converted_value = self.__set__(obj, value, _return=True) + + if self._default_is_safe is _NO_BUT_CAN_CACHE_FIRST_RESULT: + # there is a possibility to remember the new default and skip this next time + + # If there was a conversion, use the converted value as the new default + if possibly_converted_value is not value: + if self.is_default_factory: + # Modify the `copy_value` factory + self.default.set_copied_value(possibly_converted_value) + else: + # Modify the value + self.default = possibly_converted_value + # else: + # # no conversion: we can continue to use the same default value, it is valid + # pass + + # mark the default as safe now, so that this is skipped next time + self._default_is_safe = _YES + + return possibly_converted_value return value @@ -1007,7 +1060,8 @@ def trace_convert(self, value, obj=None): def __set__(self, obj, - value # type: T + value, # type: T + _return=False # type: bool ): # do this first, because a field might be referenced from its class the first time it will be used @@ -1091,6 +1145,10 @@ def __set__(self, # set the new value setattr(obj, private_name, value) + # return it for the callers that need it + if _return: + return value + def __delete__(self, obj): # private_name = "_" + self.name delattr(obj, "_" + self.name) diff --git a/pyfields/helpers.py b/pyfields/helpers.py index d2d4ca1..eea8d9c 100644 --- a/pyfields/helpers.py +++ b/pyfields/helpers.py @@ -1,6 +1,7 @@ # Authors: Sylvain Marie # # Copyright (c) Schneider Electric Industries, 2019. All right reserved. +import sys from copy import copy, deepcopy from inspect import getmro @@ -181,27 +182,71 @@ def get_fields(cls, # yield k -def copy_value(val, - deep=True # type: bool - ): - """ - Returns a default value factory to be used in a `field(default_factory=...)`. +if sys.version_info < (3, 7): + # in python < 3.7, closures cannot be modified, we have to use a mutable object (slightly slower) + def copy_value(val, + deep=True # type: bool + ): + """ + Returns a default value factory to be used in a `field(default_factory=...)`. + + That factory will create a copy of the provided `val` everytime it is called. Handy if you wish to use mutable + objects as default values for your fields ; for example lists. + + :param val: the (mutable) value to copy + :param deep: by default deep copies will be created. You can change this behaviour by setting this to `False` + :return: + """ + the_val = [val] + if deep: + def create_default(obj): + return deepcopy(the_val[0]) + else: + def create_default(obj): + return copy(the_val[0]) + + # attach two methods on the function to read and write in the closure + def get_copied_value(): + return the_val[0] + + def set_copied_value(new_default): + the_val[0] = new_default + + create_default.get_copied_value = get_copied_value + create_default.set_copied_value = set_copied_value + return create_default +else: + # in python >= 3.7, closures can be modified + def copy_value(val, + deep=True # type: bool + ): + """ + Returns a default value factory to be used in a `field(default_factory=...)`. + + That factory will create a copy of the provided `val` everytime it is called. Handy if you wish to use mutable + objects as default values for your fields ; for example lists. + + :param val: the (mutable) value to copy + :param deep: by default deep copies will be created. You can change this behaviour by setting this to `False` + :return: + """ + if deep: + def create_default(obj): + return deepcopy(val) + else: + def create_default(obj): + return copy(val) - That factory will create a copy of the provided `val` everytime it is called. Handy if you wish to use mutable - objects as default values for your fields ; for example lists. + # attach two methods on the function to read and write in the closure + def get_copied_value(): + return create_default.__closure__[0].cell_contents - :param val: the (mutable) value to copy - :param deep: by default deep copies will be created. You can change this behaviour by setting this to `False` - :return: - """ - if deep: - def create_default(obj): - return deepcopy(val) - else: - def create_default(obj): - return copy(val) + def set_copied_value(new_default): + create_default.__closure__[0].cell_contents = new_default - return create_default + create_default.get_copied_value = get_copied_value + create_default.set_copied_value = set_copied_value + return create_default def copy_field(field_or_name, # type: Union[str, Field] diff --git a/pyfields/tests/test_core.py b/pyfields/tests/test_core.py index 7878ccf..33a507d 100644 --- a/pyfields/tests/test_core.py +++ b/pyfields/tests/test_core.py @@ -646,3 +646,78 @@ def test_pickle(): serialized = pickle.dumps(f) g = pickle.loads(serialized) assert vars(g) == vars(f) + + +@pytest.mark.parametrize("check_type", [False, True], ids="check_type={}".format) +@pytest.mark.parametrize("default_flavor", ["simple", "copy_value", "factory_function"], ids="use_factory={}".format) +def test_default_validated(default_flavor, check_type): + """ Tests that the default value of a DescriptorField is validated """ + + # --- How the default value is created + def make_def_kwargs(): + if default_flavor == "simple": + def_kwargs = dict(default=0) + elif default_flavor == "copy_value": + def_kwargs = dict(default_factory=copy_value(0)) + elif default_flavor == "factory_function": + def custom_factory(obj): + # important note: this could be something dependent + return 0 + def_kwargs = dict(default_factory=custom_factory) + else: + raise ValueError(default_flavor) + return def_kwargs + + def_kwargs = make_def_kwargs() + + # --- validation can be a type check or a validator + if check_type: + validator = None + else: + # a validator that validates the same thing than the type hint + def validator(x): + return isinstance(x, str) + + # nominal: the converter is used correctly on the default value so this is ok + class Foo(object): + bar = field(type_hint=str, check_type=check_type, validators=validator, converters=str, **def_kwargs) + + # default value check + bar_field = Foo.__dict__['bar'] + if default_flavor == "simple": + assert bar_field.default == 0 + assert bar_field._default_is_safe is False + elif default_flavor == "copy_value": + assert bar_field.default.get_copied_value() == 0 + assert bar_field._default_is_safe is False + elif default_flavor == "factory_function": + assert bar_field._default_is_safe is None + + # instance creation and default value access + f = Foo() + assert f.bar == '0' + + # we can check that the default value is modified + if default_flavor == "simple": + assert bar_field.default == '0' + assert bar_field._default_is_safe is True + elif default_flavor == "copy_value": + assert bar_field.default.get_copied_value() == '0' + assert bar_field._default_is_safe is True + elif default_flavor == "factory_function": + assert bar_field._default_is_safe is None + + # make sure it works fine several times :) + del f.bar + assert f.bar == '0' + g = Foo() + assert g.bar == '0' + + # no converter: does not work, the default value is not valid + def_kwargs = make_def_kwargs() + class Foo(object): + bar = field(type_hint=str, check_type=check_type, validators=validator, **def_kwargs) + + f = Foo() + with pytest.raises(FieldTypeError if check_type else ValidationError): + f.bar From 8fc12f1c1bbd5b0e5c7362b30cdeacd45f808fc0 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 22 Jan 2020 12:32:16 +0100 Subject: [PATCH 2/2] Enabled cythonization --- .travis.yml | 6 ++++- setup.py | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index ba5d731..bbbaca9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -55,7 +55,11 @@ install: script: # - coverage run tests.py - - pip install . + # cythonization step + - python setup.py build_ext --inplace # --compiler=msvc + - python setup.py bdist_wheel # create the wheel including the cython files + - pip install --pre --no-index --find-links dist\ pyfields # install existing wheels including pre-releases +# - pip install . - python -c "import os; os.chdir('..'); import pyfields" # ***tests*** # - coverage run tests.py diff --git a/setup.py b/setup.py index 2fe2527..a35426a 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,15 @@ https://packaging.python.org/en/latest/distributing.html https://github.com/pypa/sampleproject """ +import os +import sys +from glob import glob +from warnings import warn + from six import raise_from from os import path -from setuptools import setup, find_packages +from setuptools import setup, find_packages, Extension # do not delete Extension it might break cythonization? here = path.abspath(path.dirname(__file__)) @@ -47,6 +52,72 @@ # with open(path.join(here, 'VERSION')) as version_file: # VERSION = version_file.read().strip() # OBSOLETES = [] +# from Cython.Distutils import build_ext +# ext_modules = [Extension(module, sources=[module + ".pyx"], +# include_dirs=['path1','path2'], # put include paths here +# library_dirs=[], # usually need your Windows SDK stuff here +# language='c++')] + + +# TODO understand how to do this as an optional thing https://github.com/samuelcolvin/pydantic/pull/548/files +# OR simply fork an independent pyfields_cy project +ext_modules = None +if not any(arg in sys.argv for arg in ['clean', 'check']) and 'SKIP_CYTHON' not in os.environ: + try: + from Cython.Build import cythonize + except ImportError: + warn("Cython not present - not cythonizing pyfields") + else: + # For cython test coverage install with `make build-cython-trace` + compiler_directives = {} + if 'CYTHON_TRACE' in sys.argv: + compiler_directives['linetrace'] = True + # compiler_directives['MD'] = True + os.environ['CFLAGS'] = '-O3' + + # C compilation options: {'language_level': 3, 'compiler_directives': {}, 'include_path': ['.']} + # include_path = '.' + + ext_modules = cythonize( + 'pyfields/*.py', + exclude=['pyfields/tests/*.py', 'pyfields/__init__.py', 'pyfields/_version.py'], + nthreads=int(os.getenv('CYTHON_NTHREADS', 0)), + language_level=3, + compiler_directives=compiler_directives, # todo /MT >> /MD + ) + for e in ext_modules: + # 'py_limited_api': False, + # 'name': 'pyfields.core', + # 'sources': ['pyfields\\core.c'], + # 'include_dirs': [], + # 'define_macros': [], + # 'undef_macros': [], + # 'library_dirs': [], + # 'libraries': [], + # 'runtime_library_dirs': [], + # 'extra_objects': [], + # 'extra_compile_args': [], + # 'extra_link_args': [], + # 'export_symbols': [], + # 'swig_opts': [], + # 'depends': [], + # 'language': None, + # 'optional': None, + # 'np_pythran': False} + # + # See https://docs.microsoft.com/fr-fr/cpp/build/reference/md-mt-ld-use-run-time-library?view=vs-2019 + # I could not find an easier way to use this flac (dynamic linkage to runtime) rather than the default /MT (static) + # but I understood that it was needed by looking at scikit-learn compilations + e.extra_compile_args.append('/MD') + print(vars(e)) + +if 'clean' in sys.argv: + for c_file in glob("pyfields/*.c"): + print("Deleting %s" % c_file) + os.remove(c_file) + for pyd_file in glob("pyfields/*.pyd"): + print("Deleting %s" % pyd_file) + os.remove(pyd_file) setup( name=DISTNAME, @@ -150,4 +221,5 @@ # 'sample=sample:main', # ], # }, + ext_modules=ext_modules, )