diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..5c15934 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +40e5de5211ff6967c6e14f47fd62b3add5997d5f diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b78365c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +ro_json/_static_version.py export-subst \ No newline at end of file diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml new file mode 100644 index 0000000..33906e9 --- /dev/null +++ b/.github/workflows/pypi.yaml @@ -0,0 +1,41 @@ +# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ +name: Publish Python 🐍 distributions πŸ“¦ to PyPI and TestPyPI + +on: + push: + tags: + - '*' + +permissions: + id-token: write + contents: read + +jobs: + build-n-publish: + name: Build and Publish Python 🐍 Distributions πŸ“¦ to PyPI and TestPyPI + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + # https://github.com/actions/checkout#fetch-all-history-for-all-tags-and-branches + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install build + run: python -m pip install --upgrade pip build + + - name: Build Distributions + run: python -m build --sdist --wheel --outdir dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + print-hash: true + verify-metadata: true diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3b84922..d9fde03 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,5 +1,4 @@ - -name: 'pyjson-tricks' +name: Testing on: push: @@ -9,71 +8,54 @@ on: jobs: build: - name: tests + name: Tests on Python ${{ matrix.python-version }} runs-on: ubuntu-latest strategy: + matrix: + python-version: ['3.10', '3.11', '3.12'] + include: + - python-version: 3.10 + with-numpy: true + - python-version: 3.11 + with-numpy: false + - python-version: 3.12 + with-numpy: true max-parallel: 8 fail-fast: false - matrix: - libraries: [ - 'vanilla', - 'tz', - 'path', - 'numpy', - 'pandas', - 'all' - ] - python-version: [ - '3.7', - '3.8', - '3.9', - '3.10', - '3.11' - ] + steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 + - uses: proudust/gh-describe@v2 + # id needed to generate the outputs + id: ghd + - name: Check outputs + run: | + echo "describe : ${{ steps.ghd.outputs.describe }}" + echo "tag : ${{ steps.ghd.outputs.tag }}" + echo "distance : ${{ steps.ghd.outputs.distance }}" + echo "sha : ${{ steps.ghd.outputs.sha }}" + echo "short-sha : ${{ steps.ghd.outputs.short-sha }}" - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | + export RO_JSON_GIT_DESCRIBE=${{ steps.ghd.outputs.describe }} python -m pip install --upgrade pip - pip install pytest - if [ "${{ matrix.python-version }}" == "2.7" ] ; then - pip install enum34 - fi - export LIBS="${{ matrix.libraries }}" - if [ "$LIBS" == "tz" ] || [ "$LIBS" == "all" ] ; then - pip install pytz - fi - if [ "$LIBS" == "path" ] || [ "$LIBS" == "all" ] ; then - pip install pathlib - fi - if [ "$LIBS" == "numpy" ] || [ "$LIBS" == "all" ] ; then - pip install numpy - fi - if [ "$LIBS" == "pandas" ] || [ "$LIBS" == "all" ] ; then - pip install pandas + pip install setuptools pytest pathlib + if [ "${{ matrix.with-numpy }}" = "true" ]; then + pytz numpy pandas fi + + - name: Install package + run: | + export RO_JSON_GIT_DESCRIBE=${{ steps.ghd.outputs.describe }} + python -m pip install . -vvv + - name: Run tests run: | + export RO_JSON_GIT_DESCRIBE=${{ steps.ghd.outputs.describe }} python --version - PYTEST_ARGS='-v --strict tests/test_bare.py tests/test_class.py tests/test_meta.py tests/test_enum.py' - export LIBS="${{ matrix.libraries }}" - if [ "$LIBS" == "vanilla" ] ; then - py.test $PYTEST_ARGS - elif [ "$LIBS" == "tz" ] ; then - py.test $PYTEST_ARGS tests/test_tz.py - elif [ "$LIBS" == "path" ] ; then - py.test $PYTEST_ARGS tests/test_pathlib.py - elif [ "$LIBS" == "numpy" ] ; then - py.test $PYTEST_ARGS tests/test_np.py - elif [ "$LIBS" == "pandas" ] ; then - py.test $PYTEST_ARGS tests/test_pandas.py - elif [ "$LIBS" == "all" ] ; then - py.test -v --strict - else - echo "UNKNOWN LIBRARY '$LIBS'" - exit 1 - fi + pytest -v --strict diff --git a/README.md b/README.md index e05c663..9464586 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,12 @@ -# JSON tricks (python) +> [!NOTE] -The [pyjson-tricks] package brings several pieces of +>The primary reason for this fork is to enable full round-trip serialization and deserialization of NumPy scalars and 0-dimensional arrays to JSON and back. This feature is essential for applications that require precise data preservation when working with NumPy data types. + +Despite contributing this enhancement to the original project (see [Pull Request #99](https://github.com/mverleg/pyjson_tricks/pull/99)), there was a difference in opinion with the maintainer regarding its inclusion. As a result, this fork aims to continue development with this functionality integrated. + +# ro_json + +The [ro-json] package brings several pieces of functionality to python handling of json files: 1. **Store and load numpy arrays** in human-readable format. @@ -13,9 +19,9 @@ functionality to python handling of json files: As well as compression and disallowing duplicate keys. -* Code: -* Documentation: -* PIP: +* Code: + +* PIP: Several keys of the format `__keyname__` have special meanings, and more might be added in future releases. @@ -31,7 +37,7 @@ Thanks for all the Github stars⭐! You can install using ``` bash -pip install json-tricks +pip install ro_json ``` Decoding of some data types needs the corresponding package to be @@ -42,7 +48,7 @@ You can import the usual json functions dump(s) and load(s), as well as a separate comment removal function, as follows: ``` bash -from json_tricks import dump, dumps, load, loads, strip_comments +from ro_json import dump, dumps, load, loads, strip_comments ``` The exact signatures of these and other functions are in the [documentation](http://json-tricks.readthedocs.org/en/latest/#main-components). @@ -75,7 +81,7 @@ this yields: ``` which will be converted back to a numpy array when using -`json_tricks.loads`. Note that the memory order (`Corder`) is only +`ro_json.loads`. Note that the memory order (`Corder`) is only stored in v3.1 and later and for arrays with at least 2 dimensions. As you see, this uses the magic key `__ndarray__`. Don't use @@ -87,9 +93,9 @@ closest python primitive type. A special representation was not feasible, because Python's json implementation serializes some numpy types as primitives, without consulting custom encoders. If you want to preserve the exact numpy type, use -[encode_scalars_inplace](https://json-tricks.readthedocs.io/en/latest/#json_tricks.np_utils.encode_scalars_inplace). +[encode_scalars_inplace](https://json-tricks.readthedocs.io/en/latest/#ro_json.np_utils.encode_scalars_inplace). -There is also a compressed format (thanks `claydugo` for fix). From +There is also a compressed format (thanks `claydugo` for fix). From the next major release, this will be default when using compression. For now, you can use it as: @@ -122,14 +128,14 @@ dumps(data, compression=False, properties={'ndarray_compact': 8}) ## Class instances -`json_tricks` can serialize class instances. +`ro_json` can serialize class instances. If the class behaves normally (not generated dynamic, no `__new__` or `__metaclass__` magic, etc) *and* all it's attributes are serializable, then this should work by default. ``` python -# json_tricks/test_class.py +# ro_json/test_class.py class MyTestCls: def __init__(self, **kwargs): for k, v in kwargs.items(): @@ -146,7 +152,7 @@ You'll get your instance back. Here the json looks like this: ``` javascript { "__instance_type__": [ - "json_tricks.test_class", + "ro_json.test_class", "MyTestCls" ], "attributes": { @@ -211,7 +217,7 @@ Date, time, datetime and timedelta objects are stored as dictionaries of "day", "hour", "millisecond" etc keys, for each nonzero property. Timezone name is also stored in case it is set, as is DST (thanks `eumir`). -You'll need to have `pytz` installed to use timezone-aware date/times, +You'll need to have `pytz` installed to use timezone-aware date/times, it's not needed for naive date/times. ``` javascript @@ -253,7 +259,7 @@ ordered = OrderedDict(( Converting to json and back will preserve the order: ``` python -from json_tricks import dumps, loads +from ro_json import dumps, loads json = dumps(ordered) ordered = loads(json, preserve_order=True) ``` @@ -303,12 +309,12 @@ Since comments aren't stored in the Python representation of the data, loading and then saving a json file will remove the comments (it also likely changes the indentation). -The implementation of comments is a bit crude, which means that there are +The implementation of comments is a bit crude, which means that there are some exceptional cases that aren't handled correctly ([#57](https://github.com/mverleg/pyjson_tricks/issues/57)). -It is also not very fast. For that reason, if `ignore_comments` wasn't -explicitly set to True, then json-tricks first tries to parge without -ignoring comments. If that fails, then it will automatically re-try +It is also not very fast. For that reason, if `ignore_comments` wasn't +explicitly set to True, then ro_json first tries to parse without +ignoring comments. If that fails, then it will automatically re-try with comment handling. This makes the no-comment case faster at the cost of the comment case, so if you are expecting comments make sure to set `ignore_comments` to True. @@ -328,10 +334,10 @@ of the comment case, so if you are expecting comments make sure to set * Save and load `Enum` (thanks to `Jenselme`), either built-in in python3.4+, or with the [enum34](https://pypi.org/project/enum34/) package in earlier versions. `IntEnum` needs - [encode_intenums_inplace](https://json-tricks.readthedocs.io/en/latest/#json_tricks.utils.encode_intenums_inplace). -* `json_tricks` allows for gzip compression using the + [encode_intenums_inplace](https://json-tricks.readthedocs.io/en/latest/#ro_json.utils.encode_intenums_inplace). +* `ro_json` allows for gzip compression using the `compression=True` argument (off by default). -* `json_tricks` can check for duplicate keys in maps by setting +* `ro_json` can check for duplicate keys in maps by setting `allow_duplicates` to False. These are [kind of allowed](http://stackoverflow.com/questions/21832701/does-json-syntax-allow-duplicate-keys-in-an-object), but are handled inconsistently between json implementations. In @@ -340,8 +346,8 @@ of the comment case, so if you are expecting comments make sure to set * Save and load `pathlib.Path` objects (e.g., the current path, `Path('.')`, serializes as `{"__pathlib__": "."}`) (thanks to `bburan`). -* Save and load bytes (python 3+ only), which will be encoded as utf8 if - that is valid, or as base64 otherwise. Base64 is always used if +* Save and load bytes (python 3+ only), which will be encoded as utf8 if + that is valid, or as base64 otherwise. Base64 is always used if primitives are requested. Serialized as `[{"__bytes_b64__": "aGVsbG8="}]` vs `[{"__bytes_utf8__": "hello"}]`. * Save and load slices (thanks to `claydugo`). @@ -349,7 +355,7 @@ of the comment case, so if you are expecting comments make sure to set # Preserve type vs use primitive By default, types are encoded such that they can be restored to their -original type when loaded with `json-tricks`. Example encodings in this +original type when loaded with `ro-json`. Example encodings in this documentation refer to that format. You can also choose to store things as their closest primitive type @@ -455,9 +461,9 @@ print(dumps(data, primitives=True)) ] ``` -Note that valid json is produced either way: ``json-tricks`` stores meta data as normal json, but other packages probably won't interpret it. +Note that valid json is produced either way: ``ro_json`` stores meta data as normal json, but other packages probably won't interpret it. -Note that valid json is produced either way: `json-tricks` stores meta +Note that valid json is produced either way: `ro_json` stores meta data as normal json, but other packages probably won't interpret it. # Usage & contributions @@ -477,6 +483,4 @@ Contributors not yet mentioned: `janLo` (performance boost). Tests are run automatically for commits to the repository for all supported versions. This is the status: -![image](https://github.com/mverleg/pyjson_tricks/workflows/pyjson-tricks/badge.svg?branch=master) - -To run the tests manually for your version, see [this guide](tests/run_locally.md). \ No newline at end of file +To run the tests manually for your version, see [this guide](tests/run_locally.md). diff --git a/docs/index.rst b/docs/index.rst index dc4f7c1..78359ff 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,30 +9,30 @@ Support for numpy, pandas and other libraries should work automatically if those dumps +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.nonp.dumps +.. autofunction:: ro_json.nonp.dumps -.. autofunction:: json_tricks.np.dumps +.. autofunction:: ro_json.np.dumps dump +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.nonp.dump +.. autofunction:: ro_json.nonp.dump -.. autofunction:: json_tricks.np.dump +.. autofunction:: ro_json.np.dump loads +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.nonp.loads +.. autofunction:: ro_json.nonp.loads -.. autofunction:: json_tricks.np.loads +.. autofunction:: ro_json.np.loads load +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.nonp.load +.. autofunction:: ro_json.nonp.load -.. autofunction:: json_tricks.np.load +.. autofunction:: ro_json.np.load Utilities --------------------------------------- @@ -40,19 +40,19 @@ Utilities strip comments +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.comment.strip_comments +.. autofunction:: ro_json.comment.strip_comments numpy +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.np.numpy_encode +.. autofunction:: ro_json.np.numpy_encode -.. autofunction:: json_tricks.np.json_numpy_obj_hook +.. autofunction:: ro_json.np.json_numpy_obj_hook class instances +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.encoders.class_instance_encode +.. autofunction:: ro_json.encoders.class_instance_encode .. autoclass:: json_tricks.decoders.ClassInstanceHook @@ -61,20 +61,20 @@ enum instances Support for enums was added in Python 3.4. Support for previous versions of Python is available with the `enum 34`_ package. -.. autofunction:: json_tricks.encoders.enum_instance_encode +.. autofunction:: ro_json.encoders.enum_instance_encode .. autoclass:: json_tricks.decoders.EnumInstanceHook By default ``IntEnum`` cannot be encoded as enums since they cannot be differenciated from integers. To serialize them, you must use `encode_intenums_inplace` which mutates a nested data structure (in place!) to replace any ``IntEnum`` by their representation. If you serialize this result, it can subsequently be loaded without further adaptations. -.. autofunction:: json_tricks.utils.encode_intenums_inplace +.. autofunction:: ro_json.utils.encode_intenums_inplace date/time +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.encoders.json_date_time_encode +.. autofunction:: ro_json.encoders.json_date_time_encode -.. autofunction:: json_tricks.decoders.json_date_time_hook +.. autofunction:: ro_json.decoders.json_date_time_hook numpy scalars +++++++++++++++++++++++++++++++++++++++ @@ -85,7 +85,7 @@ So if you really want to encode numpy scalars, you'll have to do the conversion It's not great, but unless the Python json module changes, it's the best that can be done. See `issue 18`_ for more details. -.. autofunction:: json_tricks.np_utils.encode_scalars_inplace +.. autofunction:: ro_json.np_utils.encode_scalars_inplace Table of content --------------------------------------- diff --git a/json_tricks/_version.py b/json_tricks/_version.py deleted file mode 100644 index 8b30903..0000000 --- a/json_tricks/_version.py +++ /dev/null @@ -1,3 +0,0 @@ - -VERSION = '3.17.2' - diff --git a/json_tricks/__init__.py b/ro_json/__init__.py similarity index 96% rename from json_tricks/__init__.py rename to ro_json/__init__.py index af85f1b..5a0cff2 100644 --- a/json_tricks/__init__.py +++ b/ro_json/__init__.py @@ -13,10 +13,6 @@ numeric_types_hook, ClassInstanceHook, json_set_hook, pandas_hook, nopandas_hook, json_numpy_obj_hook, \ json_nonumpy_obj_hook, pathlib_hook, json_bytes_hook from .nonp import dumps, dump, loads, load -from ._version import VERSION - -__version__ = VERSION - try: # find_module takes just as long as importing, so no optimization possible @@ -30,3 +26,4 @@ # from .np_utils import encode_scalars_inplace +from ._version import __version__ diff --git a/ro_json/_static_version.py b/ro_json/_static_version.py new file mode 100644 index 0000000..5557f9b --- /dev/null +++ b/ro_json/_static_version.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# This file is part of 'miniver': https://github.com/jbweston/miniver +# +# This file will be overwritten by setup.py when a source or binary +# distribution is made. The magic value "__use_git__" is interpreted by +# version.py. + +version = "__use_git__" + +# These values are only set if the distribution was created with 'git archive' +refnames = "$Format:%D$" +git_hash = "$Format:%h$" diff --git a/ro_json/_version.py b/ro_json/_version.py new file mode 100644 index 0000000..633703d --- /dev/null +++ b/ro_json/_version.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +# This file is part of 'miniver': https://github.com/jbweston/miniver +# +from collections import namedtuple +import os +import subprocess + +from setuptools.command.build_py import build_py as build_py_orig +from setuptools.command.sdist import sdist as sdist_orig + +Version = namedtuple("Version", ("release", "dev", "labels")) + +# No public API +__all__ = [] + +package_root = os.path.dirname(os.path.realpath(__file__)) +package_name = os.path.basename(package_root) + +STATIC_VERSION_FILE = "_static_version.py" + + +def get_version(version_file=STATIC_VERSION_FILE): + version_info = get_static_version_info(version_file) + version = version_info["version"] + if version == "__use_git__": + version = get_version_from_git() + if not version: + version = get_version_from_git_archive(version_info) + if not version: + version = Version("unknown", None, None) + return pep440_format(version) + else: + return version + + +def get_static_version_info(version_file=STATIC_VERSION_FILE): + version_info = {} + with open(os.path.join(package_root, version_file), "rb") as f: + exec(f.read(), {}, version_info) + return version_info + + +def version_is_from_git(version_file=STATIC_VERSION_FILE): + return get_static_version_info(version_file)["version"] == "__use_git__" + + +def pep440_format(version_info): + release, dev, labels = version_info + + version_parts = [release] + if dev: + if release.endswith("-dev") or release.endswith(".dev"): + version_parts.append(dev) + else: # prefer PEP440 over strict adhesion to semver + version_parts.append(".post{}".format(dev)) + + if labels: + version_parts.append("+") + version_parts.append(".".join(labels)) + + return "".join(version_parts) + + +def get_version_from_git(): + # git describe --first-parent does not take into account tags from branches + # that were merged-in. The '--long' flag gets us the 'dev' version and + # git hash, '--always' returns the git hash even if there are no tags. + for opts in [["--first-parent"], []]: + try: + p = subprocess.Popen( + ["git", "describe", "--tags", "--long", "--always"] + opts, + cwd=package_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except OSError: + return + if p.wait() == 0: + break + else: + return + + if os.environ.get("RO_JSON_GIT_DESCRIBE", None): + git_describe = os.environ["RO_JSON_GIT_DESCRIBE"] + else: + git_describe = p.communicate()[0].decode() + + description = ( + git_describe + .strip("v") # Tags can have a leading 'v', but the version should not + .rstrip("\n") + .rsplit("-", 2) # Split the latest tag, commits since tag, and hash + ) + + try: + release, dev, git = description + except ValueError: # No tags, only the git hash + # prepend 'g' to match with format returned by 'git describe' + git = "g{}".format(*description) + release = "unknown" + dev = None + + labels = [] + if dev == "0": + dev = None + else: + labels.append(git) + + try: + p = subprocess.Popen(["git", "diff", "--quiet"], cwd=package_root) + except OSError: + labels.append("confused") # This should never happen. + else: + if p.wait() == 1: + labels.append("dirty") + + return Version(release, dev, labels) + + +# TODO: change this logic when there is a git pretty-format +# that gives the same output as 'git describe'. +# Currently we can only tell the tag the current commit is +# pointing to, or its hash (with no version info) +# if it is not tagged. +def get_version_from_git_archive(version_info): + try: + refnames = version_info["refnames"] + git_hash = version_info["git_hash"] + except KeyError: + # These fields are not present if we are running from an sdist. + # Execution should never reach here, though + return None + + if git_hash.startswith("$Format") or refnames.startswith("$Format"): + # variables not expanded during 'git archive' + return None + + VTAG = "tag: v" + refs = set(r.strip() for r in refnames.split(",")) + version_tags = set(r[len(VTAG) :] for r in refs if r.startswith(VTAG)) + if version_tags: + release, *_ = sorted(version_tags) # prefer e.g. "2.0" over "2.0rc1" + return Version(release, dev=None, labels=None) + else: + return Version("unknown", dev=None, labels=["g{}".format(git_hash)]) + + +__version__ = get_version() + + +# The following section defines a 'get_cmdclass' function +# that can be used from setup.py. The '__version__' module +# global is used (but not modified). + + +def _write_version(fname): + # This could be a hard link, so try to delete it first. Is there any way + # to do this atomically together with opening? + try: + os.remove(fname) + except OSError: + pass + with open(fname, "w") as f: + f.write( + "# This file has been created by setup.py.\n" + "version = '{}'\n".format(__version__) + ) + + +def get_cmdclass(pkg_source_path): + class _build_py(build_py_orig): + def run(self): + super().run() + + src_marker = "".join(["src", os.path.sep]) + + if pkg_source_path.startswith(src_marker): + path = pkg_source_path[len(src_marker):] + else: + path = pkg_source_path + _write_version( + os.path.join( + self.build_lib, path, STATIC_VERSION_FILE + ) + ) + + class _sdist(sdist_orig): + def make_release_tree(self, base_dir, files): + super().make_release_tree(base_dir, files) + _write_version( + os.path.join(base_dir, pkg_source_path, STATIC_VERSION_FILE) + ) + + return dict(sdist=_sdist, build_py=_build_py) diff --git a/json_tricks/comment.py b/ro_json/comment.py similarity index 100% rename from json_tricks/comment.py rename to ro_json/comment.py diff --git a/json_tricks/decoders.py b/ro_json/decoders.py similarity index 89% rename from json_tricks/decoders.py rename to ro_json/decoders.py index 63aa090..ad9e8b0 100644 --- a/json_tricks/decoders.py +++ b/ro_json/decoders.py @@ -6,7 +6,7 @@ from decimal import Decimal from fractions import Fraction -from json_tricks import NoEnumException, NoPandasException, NoNumpyException +from ro_json import NoEnumException, NoPandasException, NoNumpyException from .utils import ClassInstanceHookBase, nested_index, str_type, gzip_decompress, filtered_wrapper @@ -159,6 +159,13 @@ def slice_hook(dct): return dct return slice(dct['start'], dct['stop'], dct['step']) +def range_hook(dct): + if not isinstance(dct, dict): + return dct + if not '__range__' in dct: + return dct + return range(dct['start'], dct['stop'], dct['step']) + class EnumInstanceHook(ClassInstanceHookBase): """ @@ -275,8 +282,16 @@ def json_numpy_obj_hook(dct): """ if not isinstance(dct, dict): return dct - if not '__ndarray__' in dct: + if '__ndarray__' not in dct: return dct + if 'shape' not in dct or (dct['shape'] == [] and not dct.get('0dim', False)): + # New style scalar encoding + return _decode_numpy_scalar(dct) + else: + return _decode_ndarray(dct) + + +def _decode_ndarray(dct): try: import numpy except ImportError: @@ -297,7 +312,32 @@ def json_numpy_obj_hook(dct): else: return _lists_of_numbers_to_ndarray(data_json, order, shape, nptype) else: - return _scalar_to_numpy(data_json, nptype) + # This code path is mostly for 0-dimensional arrays + # numpy scalars are separately decoded + return numpy.asarray( + data_json, + dtype=nptype + ).reshape(dct['shape']) + + +def _decode_numpy_scalar(dct): + try: + import numpy + except ImportError: + raise NoNumpyException('Trying to decode a map which appears to represent a numpy ' + 'scalar, but numpy appears not to be installed.') + + # numpy.asarray will handle dtypes with units well (such as datetime64) + arr = numpy.asarray(dct['__ndarray__'], dtype=dct['dtype']) + + # https://numpy.org/doc/stable/reference/arrays.scalars.html#indexing + # https://numpy.org/doc/stable/user/basics.indexing.html#detailed-notes + # > An empty (tuple) index is a full scalar index into a zero-dimensional + # array. x[()] returns a scalar if x is zero-dimensional and a view + # otherwise. On the other hand, x[...] always returns a view. + + scalar = arr[()] + return scalar def _bin_str_to_ndarray(data, order, shape, np_type_name, data_endianness): @@ -354,15 +394,6 @@ def _lists_of_obj_to_ndarray(data, order, shape, dtype): return arr -def _scalar_to_numpy(data, dtype): - """ - From scalar value to numpy type. - """ - import numpy as nptypes - dtype = getattr(nptypes, dtype) - return dtype(data) - - def json_nonumpy_obj_hook(dct): """ This hook has no effect except to check if you're trying to decode numpy arrays without support, and give you a useful message. diff --git a/json_tricks/encoders.py b/ro_json/encoders.py similarity index 70% rename from json_tricks/encoders.py rename to ro_json/encoders.py index c5c3213..32094c6 100644 --- a/json_tricks/encoders.py +++ b/ro_json/encoders.py @@ -5,6 +5,7 @@ from fractions import Fraction from functools import wraps from json import JSONEncoder +from json.encoder import encode_basestring_ascii, encode_basestring, INFINITY import sys from .utils import hashodict, get_module_name_from_object, NoEnumException, NoPandasException, \ @@ -81,6 +82,54 @@ def default(self, obj, *args, **kwargs): type(obj), self.__class__.__name__, ', '.join(str(encoder) for encoder in self.obj_encoders))) return obj + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + + def floatstr(o, allow_nan=self.allow_nan, + _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on the + # internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + def json_date_time_encode(obj, primitives=False): """ @@ -298,6 +347,20 @@ def slice_encode(obj, primitives=False): ('step', obj.step), )) +def range_encode(obj, primitives=False): + if not isinstance(obj, range): + return obj + + if primitives: + return [obj.start, obj.stop, obj.step] + else: + return hashodict(( + ('__range__', True), + ('start', obj.start), + ('stop', obj.stop), + ('step', obj.step), + )) + class ClassInstanceEncoder(JSONEncoder): """ See `class_instance_encoder`. @@ -375,7 +438,9 @@ def numpy_encode(obj, primitives=False, properties=None): :param primitives: If True, arrays are serialized as (nested) lists without meta info. """ - from numpy import ndarray, generic + from numpy import ndarray, generic, datetime64 + + scalar_types = (generic, datetime64) if isinstance(obj, ndarray): if primitives: @@ -391,7 +456,7 @@ def numpy_encode(obj, primitives=False, properties=None): numpy_encode._warned_compact = True warnings.warn('storing ndarray in text format while compression in enabled; in the next major version ' 'of json_tricks, the default when using compression will change to compact mode; to already use ' - 'that smaller format, pass `properties={"ndarray_compact": True}` to json_tricks.dump; ' + 'that smaller format, pass `properties={"ndarray_compact": True}` to ro_json.dump; ' 'to silence this warning, pass `properties={"ndarray_compact": False}`; ' 'see issue https://github.com/mverleg/pyjson_tricks/issues/73', JsonTricksDeprecation) # Property 'use_compact' may also be an integer, in which case it's the number of @@ -407,17 +472,19 @@ def numpy_encode(obj, primitives=False, properties=None): ('__ndarray__', data_json), ('dtype', str(obj.dtype)), ('shape', obj.shape), + ('0dim', obj.ndim == 0), )) if len(obj.shape) > 1: dct['Corder'] = obj.flags['C_CONTIGUOUS'] if use_compact and store_endianness != 'suppress': dct['endian'] = store_endianness or sys.byteorder return dct - elif isinstance(obj, generic): - if NumpyEncoder.SHOW_SCALAR_WARNING: - NumpyEncoder.SHOW_SCALAR_WARNING = False - warnings.warn('json-tricks: numpy scalar serialization is experimental and may work differently in future versions') - return obj.item() + elif isinstance(obj, scalar_types): + return hashodict(( + ('__ndarray__', obj.item()), + ('dtype', str(obj.dtype)), + ('0dim', False), + )) return obj @@ -476,3 +543,196 @@ def default(self, obj, *args, **kwargs): warnings.warn('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`', JsonTricksDeprecation) obj = nonumpy_encode(obj) return super(NoNumpyEncoder, self).default(obj, *args, **kwargs) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, + _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + ValueError=ValueError, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + str=str, + tuple=tuple, + _intstr=int.__repr__, + ): + + try: + import numpy + def isfloatinstance(obj): + return isinstance(obj, float) and not isinstance(obj, numpy.number) + except ImportError: + def isfloatinstance(obj): + return isinstance(obj, float) + + if _indent is not None and not isinstance(_indent, str): + _indent = ' ' * _indent + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + _indent * _current_indent_level + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, str): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, int): + # Subclasses of int/float may override __repr__, but we still + # want to encode them as integers/floats in JSON. One example + # within the standard library is IntEnum. + yield buf + _intstr(value) + elif isfloatinstance(value): + # see comment above for int + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + _indent * _current_indent_level + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + _indent * _current_indent_level + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = sorted(dct.items()) + else: + items = dct.items() + for key, value in items: + if isinstance(key, str): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + # see comment for int/float in _make_iterencode + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, int): + # see comment for int/float in _make_iterencode + key = _intstr(key) + elif _skipkeys: + continue + else: + raise TypeError(f'keys must be str, int, float, bool or None, ' + f'not {key.__class__.__name__}') + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, str): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, int): + # see comment for int/float in _make_iterencode + yield _intstr(value) + elif isfloatinstance(value): + # see comment for int/float in _make_iterencode + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + _indent * _current_indent_level + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, str): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, int): + # see comment for int/float in _make_iterencode + yield _intstr(o) + elif isfloatinstance(o): + # see comment for int/float in _make_iterencode + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + yield from _iterencode_list(o, _current_indent_level) + elif isinstance(o, dict): + yield from _iterencode_dict(o, _current_indent_level) + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + yield from _iterencode(o, _current_indent_level) + if markers is not None: + del markers[markerid] + return _iterencode diff --git a/json_tricks/nonp.py b/ro_json/nonp.py similarity index 96% rename from json_tricks/nonp.py rename to ro_json/nonp.py index d00d754..1dcef83 100644 --- a/json_tricks/nonp.py +++ b/ro_json/nonp.py @@ -3,20 +3,20 @@ from os import fsync from sys import exc_info -from json_tricks.utils import is_py3, dict_default, gzip_compress, gzip_decompress, JsonTricksDeprecation +from ro_json.utils import is_py3, dict_default, gzip_compress, gzip_decompress, JsonTricksDeprecation from .utils import str_type, NoNumpyException # keep 'unused' imports from .comment import strip_comments # keep 'unused' imports #TODO @mark: imports removed? from .encoders import TricksEncoder, json_date_time_encode, \ class_instance_encode, json_complex_encode, json_set_encode, numeric_types_encode, numpy_encode, \ nonumpy_encode, nopandas_encode, pandas_encode, noenum_instance_encode, \ - enum_instance_encode, pathlib_encode, bytes_encode, slice_encode # keep 'unused' imports + enum_instance_encode, pathlib_encode, bytes_encode, slice_encode, range_encode # keep 'unused' imports from .decoders import TricksPairHook, \ json_date_time_hook, ClassInstanceHook, \ json_complex_hook, json_set_hook, numeric_types_hook, json_numpy_obj_hook, \ json_nonumpy_obj_hook, \ nopandas_hook, pandas_hook, EnumInstanceHook, \ - noenum_hook, pathlib_hook, nopathlib_hook, json_bytes_hook, slice_hook # keep 'unused' imports + noenum_hook, pathlib_hook, nopathlib_hook, json_bytes_hook, slice_hook, range_hook # keep 'unused' imports ENCODING = 'UTF-8' @@ -33,6 +33,7 @@ class_instance_encode, bytes_encode, slice_encode, + range_encode, ] DEFAULT_HOOKS = [ @@ -43,6 +44,7 @@ _cih_instance, json_bytes_hook, slice_hook, + range_hook, ] @@ -111,7 +113,7 @@ def dumps(obj, sort_keys=None, cls=None, obj_encoders=DEFAULT_ENCODERS, extra_ob Other arguments are passed on to `cls`. Note that `sort_keys` should be false if you want to preserve order. """ if not hasattr(extra_obj_encoders, '__iter__'): - raise TypeError('`extra_obj_encoders` should be a tuple in `json_tricks.dump(s)`') + raise TypeError('`extra_obj_encoders` should be a tuple in `ro_json.dump(s)`') encoders = tuple(extra_obj_encoders) + tuple(obj_encoders) properties = properties or {} dict_default(properties, 'primitives', primitives) @@ -217,7 +219,7 @@ def loads(string, preserve_order=True, ignore_comments=None, decompression=None, Other arguments are passed on to json_func. """ if not hasattr(extra_obj_pairs_hooks, '__iter__'): - raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `json_tricks.load(s)`') + raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `ro_json.load(s)`') if decompression is None: decompression = isinstance(string, bytes) and string[:2] == b'\x1f\x8b' if decompression: @@ -226,7 +228,7 @@ def loads(string, preserve_order=True, ignore_comments=None, decompression=None, if conv_str_byte: string = string.decode(ENCODING) else: - raise TypeError(('The input was of non-string type "{0:}" in `json_tricks.load(s)`. ' + raise TypeError(('The input was of non-string type "{0:}" in `ro_json.load(s)`. ' 'Bytes cannot be automatically decoding since the encoding is not known. Recommended ' 'way is to instead encode the bytes to a string and pass that string to `load(s)`, ' 'for example bytevar.encode("utf-8") if utf-8 is the encoding. Alternatively you can ' @@ -248,7 +250,7 @@ def loads(string, preserve_order=True, ignore_comments=None, decompression=None, # if this fails, re-try parsing after stripping comments result = _strip_loads(string, hook, True, **jsonkwargs) if not getattr(loads, '_ignore_comments_warned', False): - warnings.warn('`json_tricks.load(s)` stripped some comments, but `ignore_comments` was ' + warnings.warn('`ro_json.load(s)` stripped some comments, but `ignore_comments` was ' 'not passed; in the next major release, the behaviour when `ignore_comments` is not ' 'passed will change; it is recommended to explicitly pass `ignore_comments=True` if ' 'you want to strip comments; see https://github.com/mverleg/pyjson_tricks/issues/74', diff --git a/json_tricks/np.py b/ro_json/np.py similarity index 82% rename from json_tricks/np.py rename to ro_json/np.py index 5f269c9..1fbec8a 100644 --- a/json_tricks/np.py +++ b/ro_json/np.py @@ -16,10 +16,10 @@ import numpy except ImportError: raise NoNumpyException('Could not load numpy, maybe it is not installed? If you do not want to use numpy encoding ' - 'or decoding, you can import the functions from json_tricks.nonp instead, which do not need numpy.') + 'or decoding, you can import the functions from ro_json.nonp instead, which do not need numpy.') -warnings.warn('`json_tricks.np` is deprecated, you can import directly from `json_tricks`', JsonTricksDeprecation) +warnings.warn('`ro_json.np` is deprecated, you can import directly from `json_tricks`', JsonTricksDeprecation) DEFAULT_NP_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS # DEPRECATED diff --git a/json_tricks/np_utils.py b/ro_json/np_utils.py similarity index 100% rename from json_tricks/np_utils.py rename to ro_json/np_utils.py diff --git a/json_tricks/utils.py b/ro_json/utils.py similarity index 99% rename from json_tricks/utils.py rename to ro_json/utils.py index bf8a9dc..ca68a74 100644 --- a/json_tricks/utils.py +++ b/ro_json/utils.py @@ -120,6 +120,7 @@ def get_scalar_repr(npscalar): ('__ndarray__', npscalar.item()), ('dtype', str(npscalar.dtype)), ('shape', ()), + ('0dim', False), )) @@ -152,7 +153,7 @@ def encode_intenums_inplace(obj): by json-tricks. This happens in-place (the object is changed, use a copy). """ from enum import IntEnum - from json_tricks import encoders + from ro_json import encoders if isinstance(obj, IntEnum): return encoders.enum_instance_encode(obj) if isinstance(obj, dict): diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index befa856..0000000 --- a/setup.cfg +++ /dev/null @@ -1,5 +0,0 @@ -[bdist_wheel] -universal = 1 -[metadata] -description-file = README.rst -license_file = LICENSE.txt diff --git a/setup.py b/setup.py index 32b2c45..5abd125 100644 --- a/setup.py +++ b/setup.py @@ -1,69 +1,72 @@ -# -*- coding: utf-8 -*- +from setuptools import setup, find_packages -from sys import version_info -import warnings +with open('README.md', 'r', encoding='utf-8') as fh: + readme = fh.read() -from setuptools import setup +def get_version_and_cmdclass(pkg_path): + """Load version.py module without importing the whole package. -with open('README.md', 'r') as fh: - readme = fh.read() + Template code from miniver + """ + import os + from importlib.util import module_from_spec, spec_from_file_location -# with open('json_tricks/_version.py', 'r') as fh: -# version = fh.read().strip() -from json_tricks._version import VERSION + spec = spec_from_file_location("version", os.path.join(pkg_path, "_version.py")) + module = module_from_spec(spec) + spec.loader.exec_module(module) + return module.__version__, module.get_cmdclass(pkg_path) -requires = [] -if version_info < (2, 7, 0): - requires.append('ordereddict') -if (version_info[0] == 2 and version_info[1] < 7) or \ - (version_info[0] == 3 and version_info[1] < 4) or \ - version_info[0] not in (2, 3): - raise warnings.warn('`json_tricks` does not support Python version {}.{}' - .format(version_info[0], version_info[1])) +version, cmdclass = get_version_and_cmdclass("ro_json") setup( - name='json_tricks', - description='Extra features for Python\'s JSON: comments, order, numpy, ' - 'pandas, datetimes, and many more! Simple but customizable.', - long_description_content_type='text/markdown', - long_description=readme, - url='https://github.com/mverleg/pyjson_tricks', - author='Mark V', - maintainer='Mark V', - author_email='markv.nl.dev@gmail.com', - license='Revised BSD License (LICENSE.txt)', - keywords=['json', 'numpy', 'OrderedDict', 'comments', 'pandas', 'pytz', - 'enum', 'encode', 'decode', 'serialize', 'deserialize'], - version=VERSION, - packages=['json_tricks'], - package_data=dict( - json_tricks=['LICENSE.txt', 'README.md', 'VERSION'], - # tests=['tests/*.py'], - ), - # include_package_data=True, - zip_safe=True, - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Development Status :: 6 - Mature', - 'Intended Audience :: Developers', - 'Natural Language :: English', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Topic :: Software Development :: Libraries :: Python Modules', - # 'Topic :: Utilities', - ], - install_requires=requires, + name='ro-json', + version=version, + cmdclass=cmdclass, + description='Extra features for Python\'s JSON: comments, order, numpy, ' + 'pandas, datetimes, and many more! Simple but customizable.', + long_description=readme, + long_description_content_type='text/markdown', + url='https://github.com/ramonaoptics/ro-json', + author='Clay Dugo', + author_email='clay@ramonaoptics.com', + license='BSD-3-Clause', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'License :: OSI Approved :: BSD License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + keywords=[ + 'json', + 'numpy', + 'OrderedDict', + 'comments', + 'pandas', + 'pytz', + 'enum', + 'encode', + 'decode', + 'serialize', + 'deserialize', + 'roundtrip', + ], + packages=find_packages(exclude=["tests*"]), + include_package_data=True, + install_requires=[], + python_requires='>=3.10', + project_urls={ + # 'Documentation': 'https://your-package-docs-url', + 'Source': 'https://github.com/ramonaoptics/ro_json', + 'Tracker': 'https://github.com/ramonaoptics/ro_json/issues', + }, + license_files=('LICENSE.txt',), ) diff --git a/tests/test_bare.py b/tests/test_bare.py index a3c67c9..ea09d74 100644 --- a/tests/test_bare.py +++ b/tests/test_bare.py @@ -15,10 +15,10 @@ import pytest from pytest import raises, fail, warns -from json_tricks import fallback_ignore_unknown, DuplicateJsonKeyException -from json_tricks.nonp import strip_comments, dump, dumps, load, loads, \ +from ro_json import fallback_ignore_unknown, DuplicateJsonKeyException +from ro_json.nonp import strip_comments, dump, dumps, load, loads, \ ENCODING -from json_tricks.utils import is_py3, gzip_compress, JsonTricksDeprecation, str_type +from ro_json.utils import is_py3, gzip_compress, JsonTricksDeprecation, str_type from .test_class import MyTestCls, CustomEncodeCls, SubClass, SuperClass, SlotsBase, SlotsDictABC, SlotsStr, \ SlotsABCDict, SlotsABC diff --git a/tests/test_class.py b/tests/test_class.py index 590a8c9..692413b 100644 --- a/tests/test_class.py +++ b/tests/test_class.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- import weakref -from json_tricks import dumps, loads +from ro_json import dumps, loads class MyTestCls(object): diff --git a/tests/test_enum.py b/tests/test_enum.py index fa0aea6..4d2cd55 100644 --- a/tests/test_enum.py +++ b/tests/test_enum.py @@ -5,8 +5,8 @@ from datetime import datetime from functools import partial from enum import Enum, IntEnum -from json_tricks import dumps, loads, encode_intenums_inplace -from json_tricks.encoders import enum_instance_encode +from ro_json import dumps, loads, encode_intenums_inplace +from ro_json.encoders import enum_instance_encode PY2 = sys.version_info[0] == 2 diff --git a/tests/test_meta.py b/tests/test_meta.py index 89d2794..1c6043d 100644 --- a/tests/test_meta.py +++ b/tests/test_meta.py @@ -1,7 +1,12 @@ - +import ro_json import re def test_version(): - import json_tricks - assert re.match(r'^\d+\.\d+\.\d+$', json_tricks.__version__) is not None + # The version shall be compatible with + # packaging.version.Version + # and enable comparison + assert re.match(r'^\d+\.\d+\.\d+.*$', '1.2.3') is not None + assert re.match(r'^\d+\.\d+\.\d+.*$', '1.2.3.post1') is not None + assert re.match(r'^\d+\.\d+\.\d+.*$', '1.2.3.post13+g7cb3d69.dirty') is not None + assert re.match(r'^\d+\.\d+\.\d+.*$', ro_json.__version__) is not None diff --git a/tests/test_np.py b/tests/test_np.py index 4e28393..031c019 100644 --- a/tests/test_np.py +++ b/tests/test_np.py @@ -7,17 +7,20 @@ import sys from warnings import catch_warnings, simplefilter -from pytest import warns +from _pytest.recwarn import warns +from datetime import datetime, timezone + from numpy import arange, ones, array, array_equal, finfo, iinfo, pi from numpy import int8, int16, int32, int64, uint8, uint16, uint32, uint64, \ - float16, float32, float64, complex64, complex128, zeros, ndindex + float16, float32, float64, complex64, complex128, zeros, ndindex, \ + datetime64 from numpy.core.umath import exp from numpy.testing import assert_equal -from json_tricks import numpy_encode -from json_tricks.np import dump, dumps, load, loads -from json_tricks.np_utils import encode_scalars_inplace -from json_tricks.utils import JsonTricksDeprecation, gzip_decompress +from ro_json import numpy_encode +from ro_json.np import dump, dumps, load, loads +from ro_json.np_utils import encode_scalars_inplace +from ro_json.utils import JsonTricksDeprecation, gzip_decompress from .test_bare import cls_instance from .test_class import MyTestCls @@ -119,20 +122,6 @@ def test_memory_order(): arrF.flags['F_CONTIGUOUS'] == arr.flags['F_CONTIGUOUS'] -def test_scalars_types(): - # from: https://docs.scipy.org/doc/numpy/user/basics.types.html - encme = [] - for dtype in DTYPES: - for val in (dtype(0),) + get_lims(dtype): - assert isinstance(val, dtype) - encme.append(val) - json = dumps(encme, indent=2) - rec = loads(json) - assert encme == rec - for nr in rec: - assert nr.__class__ in (int, float, complex), 'primitive python type expected, see issue #18' - - def test_array_types(): # from: https://docs.scipy.org/doc/numpy/user/basics.types.html # see also `test_scalars_types` @@ -181,6 +170,23 @@ def test_dump_np_scalars(): assert data[2][3] == rec[2][3] assert data[2] == tuple(rec[2]) + json_tricks_3_17_3_encoded = '[' \ + '{"__ndarray__": -27, "dtype": "int8", "shape": []}, '\ + '{"__ndarray__": {"__complex__": [2.7182817459106445, 37.0]}, "dtype": "complex64", "shape": []}, ' \ + '[{"alpha": {"__ndarray__": -22026.465794806718, "dtype": "float64", "shape": []}, ' \ + '"str-only": {"__ndarray__": {"__complex__": [-1.0, -1.0]}, "dtype": "complex64", "shape": []}}, ' \ + '{"__ndarray__": 123456789, "dtype": "uint32", "shape": []}, ' \ + '{"__ndarray__": 0.367919921875, "dtype": "float16", "shape": []}, ' \ + '{"__set__": [{"__ndarray__": 37, "dtype": "int64", "shape": []}, ' \ + '{"__ndarray__": 0, "dtype": "uint64", "shape": []}]}]]' + rec = loads(json_tricks_3_17_3_encoded) + assert data[0] == rec[0] + assert data[1] == rec[1] + assert data[2][0] == rec[2][0] + assert data[2][1] == rec[2][1] + assert data[2][2] == rec[2][2] + assert data[2][3] == rec[2][3] + assert data[2] == tuple(rec[2]) def test_ndarray_object_nesting(): # Based on issue 53 @@ -223,8 +229,8 @@ def test_compact_mode_unspecified(): gz_json_2 = dumps(data, compression=True) assert gz_json_1 == gz_json_2 json = gzip_decompress(gz_json_1).decode('ascii') - assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ - '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' + assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": true}, ' \ + '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2], "0dim": false}]' def test_compact(): @@ -238,8 +244,8 @@ def test_encode_disable_compact(): data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=False)) json = gzip_decompress(gz_json).decode('ascii') - assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ - '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' + assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": true}, ' \ + '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2], "0dim": false}]' def test_encode_enable_compact_little_endian(): @@ -247,9 +253,9 @@ def test_encode_enable_compact_little_endian(): gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) json = gzip_decompress(gz_json).decode('ascii') assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ - 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": ' \ 'true, "endian": "little"}, {"__ndarray__": "b64:GC1EVPshCUBpVxSLCr8FQA==", "dtype": "float64", ' \ - '"shape": [2], "endian": "little"}]' + '"shape": [2], "0dim": false, "endian": "little"}]' def test_encode_enable_compact_big_endian(): @@ -257,8 +263,8 @@ def test_encode_enable_compact_big_endian(): gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='big')) json = gzip_decompress(gz_json).decode('ascii') assert json == '{"__ndarray__": "b64:P/AAAAAAAABAAAAAAAAAAEAIAAAAAAAAQBAAAAAAAABAFAAAAAAAAEAYAA' \ - 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ - 'true, "endian": "big"}' + 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "0dim": false, ' \ + '"Corder": true, "endian": "big"}' def test_encode_enable_compact_native_endian(): @@ -267,11 +273,11 @@ def test_encode_enable_compact_native_endian(): json = gzip_decompress(gz_json).decode('ascii') if sys.byteorder == 'little': assert json == '{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ - 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": ' \ 'true, "endian": "little"}' elif sys.byteorder == 'big': assert json == '{"__ndarray__": "b64:P/AAAAAAAABAAAAAAAAAAEAIAAAAAAAAQBAAAAAAAABAFAAAAAAAAEAYAA' \ - 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": ' \ 'true, "endian": "big"}' else: raise Exception("unknown system endianness '{}'".format(sys.byteorder)) @@ -289,9 +295,9 @@ def test_encode_compact_cutoff(): gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=5, ndarray_store_byteorder='little')) json = gzip_decompress(gz_json).decode('ascii') assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ - 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": ' \ 'true, "endian": "little"}, {"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", ' \ - '"shape": [2]}]' + '"shape": [2], "0dim": false}]' def test_encode_compact_inline_compression(): @@ -299,7 +305,7 @@ def test_encode_compact_inline_compression(): json = dumps(data, compression=False, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) assert 'b64.gz:' in json, 'If the overall file is not compressed and there are significant savings, then do inline gzip compression.' assert json == '[{"__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgAIEP9gwQ4AChOKC0AJQWgdISUFoGSitAaSUorQKl1aC0BpTWgtI6UFoPShs4AABmfqWAgAAAAA==", ' \ - '"dtype": "float64", "shape": [4, 4], "Corder": true, "endian": "little"}]' + '"dtype": "float64", "shape": [4, 4], "0dim": false, "Corder": true, "endian": "little"}]' def test_encode_compact_no_inline_compression(): @@ -307,7 +313,7 @@ def test_encode_compact_no_inline_compression(): json = dumps(data, compression=False, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) assert 'b64.gz:' not in json, 'If the overall file is not compressed, but there are no significant savings, then do not do inline compression.' assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEA=", ' \ - '"dtype": "float64", "shape": [2, 2], "Corder": true, "endian": "little"}]' + '"dtype": "float64", "shape": [2, 2], "0dim": false, "Corder": true, "endian": "little"}]' def test_decode_compact_mixed_compactness(): @@ -369,17 +375,109 @@ def test_empty(): assert_equal(loads(json), data, 'shape = {} ; json = {}'.format(data.shape, json)) def test_decode_writeable(): - # issue https://github.com/mverleg/pyjson_tricks/issues/90 - data = zeros((2, 2)) + # issue https://github.com/mverleg/pyjson_tricks/issues/90 + data = zeros((2, 2)) + + data_uncompressed = dumps(data) + data_compressed = dumps(data, properties={'ndarray_compact': True}) + + reloaded_uncompressed = loads(data_uncompressed) + reloaded_compressed = loads(data_compressed) + + assert array_equal(data, reloaded_uncompressed) + assert array_equal(data, reloaded_compressed) + + assert reloaded_uncompressed.flags.writeable + assert reloaded_compressed.flags.writeable + + +def test_0_dimensional_array_roundtrip(): + to_dump = zeros((), dtype='uint32') + to_dump[...] = 123 + + the_dumps = dumps(to_dump) + loaded = loads(the_dumps) + assert loaded == to_dump + + the_double_dumps = dumps(loaded) + assert the_dumps == the_double_dumps + + +def test_0_dimensional_array_roundtrip_object(): + the_set = set([1, 2, 3]) + + # We are putting it an object in a numpy array. this should serialize correctly + to_dump = zeros((), dtype=object) + to_dump[...] = the_set + + the_dumps = dumps(to_dump) + the_load = loads(the_dumps) + the_double_dumps = dumps(the_load) + + assert the_dumps == the_double_dumps + + assert isinstance(the_load[()], set) + assert the_set == the_load[()] + + +def test_scalar_roundtrip(): + to_dump = [ + uint8(1), + uint16(2), + uint32(3), + uint64(4), + int8(1), + int16(2), + int32(3), + int64(4), + float32(1), + float64(2), + ] + + the_dumps = dumps(to_dump) + the_load = loads(the_dumps) + + for original, read in zip(to_dump, the_load): + assert original == read + assert original.__class__ == read.__class__ + + the_double_dumps = dumps(loads(dumps(to_dump))) + + assert the_dumps == the_double_dumps + + +def test_round_trip_datetime64_scalars(): + now_utc = datetime.now(timezone.utc).replace(tzinfo=None) + now_M = datetime64(now_utc, 'M') + now_D = datetime64(now_utc, 'D') + now_h = datetime64(now_utc, 'h') + now_m = datetime64(now_utc, 'm') + now_s = datetime64(now_utc, 's') + now_ms = datetime64(now_utc, 'ms') + now_us = datetime64(now_utc, 'us') + now_ns = datetime64(now_utc, 'ns') + + to_dump = [ + now_M, + now_D, + now_h, + now_m, + now_s, + now_ms, + now_us, + now_ns, + now_us, + now_ns, + ] - data_uncompressed = dumps(data) - data_compressed = dumps(data, properties={'ndarray_compact': True}) + the_dumps = dumps(to_dump) + the_load = loads(the_dumps) - reloaded_uncompressed = loads(data_uncompressed) - reloaded_compressed = loads(data_compressed) + for original, read in zip(to_dump, the_load): + assert original == read + assert original.__class__ == read.__class__ + assert original.dtype == read.dtype - assert array_equal(data, reloaded_uncompressed) - assert array_equal(data, reloaded_compressed) + the_double_dumps = dumps(loads(dumps(to_dump))) - assert reloaded_uncompressed.flags.writeable - assert reloaded_compressed.flags.writeable + assert the_dumps == the_double_dumps diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 5c3d19c..8eb5ee0 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -5,7 +5,7 @@ from numpy import linspace, isnan from numpy.testing import assert_equal from pandas import DataFrame, Series -from json_tricks import dumps, loads +from ro_json import dumps, loads from tests.test_bare import nonpdata diff --git a/tests/test_pathlib.py b/tests/test_pathlib.py index fb0dbbf..f928e82 100644 --- a/tests/test_pathlib.py +++ b/tests/test_pathlib.py @@ -7,7 +7,7 @@ from pathlib import Path -from json_tricks import dumps, loads +from ro_json import dumps, loads # These paths are not necessarily actual paths that exist, but are sufficient diff --git a/tests/test_range.py b/tests/test_range.py new file mode 100644 index 0000000..6b8c22c --- /dev/null +++ b/tests/test_range.py @@ -0,0 +1,13 @@ +from ro_json import dumps, loads + +def test_range(): + original_range = range(0, 10, 2) + json_range = dumps(original_range) + loaded_range = loads(json_range) + assert original_range == loaded_range + +def test_range_no_step(): + original_range = range(0, 5) + json_range = dumps(original_range) + loaded_range = loads(json_range) + assert original_range == loaded_range diff --git a/tests/test_slice.py b/tests/test_slice.py index 9ded28f..1cf6d00 100644 --- a/tests/test_slice.py +++ b/tests/test_slice.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- from pathlib import Path -from json_tricks import dumps, loads +from ro_json import dumps, loads def test_slice(): original_slice = slice(0, 10, 2) diff --git a/tests/test_tz.py b/tests/test_tz.py index 0f36a51..cd9bc84 100644 --- a/tests/test_tz.py +++ b/tests/test_tz.py @@ -7,8 +7,8 @@ """ from datetime import datetime, date, time, timedelta, timezone -from json_tricks import dumps, loads -from json_tricks.utils import is_py3 +from ro_json import dumps, loads +from ro_json.utils import is_py3 import pytz diff --git a/tests/test_utils.py b/tests/test_utils.py index e53cc8c..0bba894 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from json_tricks.utils import hashodict, get_arg_names, nested_index +from ro_json.utils import hashodict, get_arg_names, nested_index def test_hashodict():