From 38aa751bf7ddeee1205581e8d142af223527c9b1 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Mon, 1 Jan 2024 12:04:46 -0500 Subject: [PATCH 01/16] Enable different serialization for 0dim arrays and datetime64 Get encode in place to work Remove test scalar types test --- json_tricks/decoders.py | 46 ++++++++--- json_tricks/encoders.py | 16 ++-- json_tricks/utils.py | 1 + tests/test_np.py | 178 +++++++++++++++++++++++++++++++--------- 4 files changed, 184 insertions(+), 57 deletions(-) diff --git a/json_tricks/decoders.py b/json_tricks/decoders.py index 63aa090..67336a1 100644 --- a/json_tricks/decoders.py +++ b/json_tricks/decoders.py @@ -275,8 +275,16 @@ def json_numpy_obj_hook(dct): """ if not isinstance(dct, dict): return dct - if not '__ndarray__' in dct: + if '__ndarray__' not in dct: return dct + if 'shape' not in dct or (dct['shape'] == [] and not dct.get('0dim', False)): + # New style scalar encoding + return _decode_numpy_scalar(dct) + else: + return _decode_ndarray(dct) + + +def _decode_ndarray(dct): try: import numpy except ImportError: @@ -297,7 +305,32 @@ def json_numpy_obj_hook(dct): else: return _lists_of_numbers_to_ndarray(data_json, order, shape, nptype) else: - return _scalar_to_numpy(data_json, nptype) + # This code path is mostly for 0-dimensional arrays + # numpy scalars are separately decoded + return numpy.asarray( + data_json, + dtype=nptype + ).reshape(dct['shape']) + + +def _decode_numpy_scalar(dct): + try: + import numpy + except ImportError: + raise NoNumpyException('Trying to decode a map which appears to represent a numpy ' + 'scalar, but numpy appears not to be installed.') + + # numpy.asarray will handle dtypes with units well (such as datetime64) + arr = numpy.asarray(dct['__ndarray__'], dtype=dct['dtype']) + + # https://numpy.org/doc/stable/reference/arrays.scalars.html#indexing + # https://numpy.org/doc/stable/user/basics.indexing.html#detailed-notes + # > An empty (tuple) index is a full scalar index into a zero-dimensional + # array. x[()] returns a scalar if x is zero-dimensional and a view + # otherwise. On the other hand, x[...] always returns a view. + + scalar = arr[()] + return scalar def _bin_str_to_ndarray(data, order, shape, np_type_name, data_endianness): @@ -354,15 +387,6 @@ def _lists_of_obj_to_ndarray(data, order, shape, dtype): return arr -def _scalar_to_numpy(data, dtype): - """ - From scalar value to numpy type. - """ - import numpy as nptypes - dtype = getattr(nptypes, dtype) - return dtype(data) - - def json_nonumpy_obj_hook(dct): """ This hook has no effect except to check if you're trying to decode numpy arrays without support, and give you a useful message. diff --git a/json_tricks/encoders.py b/json_tricks/encoders.py index c5c3213..141bb2d 100644 --- a/json_tricks/encoders.py +++ b/json_tricks/encoders.py @@ -375,7 +375,9 @@ def numpy_encode(obj, primitives=False, properties=None): :param primitives: If True, arrays are serialized as (nested) lists without meta info. """ - from numpy import ndarray, generic + from numpy import ndarray, generic, datetime64 + + scalar_types = (generic, datetime64) if isinstance(obj, ndarray): if primitives: @@ -407,17 +409,19 @@ def numpy_encode(obj, primitives=False, properties=None): ('__ndarray__', data_json), ('dtype', str(obj.dtype)), ('shape', obj.shape), + ('0dim', obj.ndim == 0), )) if len(obj.shape) > 1: dct['Corder'] = obj.flags['C_CONTIGUOUS'] if use_compact and store_endianness != 'suppress': dct['endian'] = store_endianness or sys.byteorder return dct - elif isinstance(obj, generic): - if NumpyEncoder.SHOW_SCALAR_WARNING: - NumpyEncoder.SHOW_SCALAR_WARNING = False - warnings.warn('json-tricks: numpy scalar serialization is experimental and may work differently in future versions') - return obj.item() + elif isinstance(obj, scalar_types): + return hashodict(( + ('__ndarray__', obj.item()), + ('dtype', str(obj.dtype)), + ('0dim', False), + )) return obj diff --git a/json_tricks/utils.py b/json_tricks/utils.py index bf8a9dc..b6e08b8 100644 --- a/json_tricks/utils.py +++ b/json_tricks/utils.py @@ -120,6 +120,7 @@ def get_scalar_repr(npscalar): ('__ndarray__', npscalar.item()), ('dtype', str(npscalar.dtype)), ('shape', ()), + ('0dim', False), )) diff --git a/tests/test_np.py b/tests/test_np.py index 4e28393..675ee0e 100644 --- a/tests/test_np.py +++ b/tests/test_np.py @@ -7,10 +7,13 @@ import sys from warnings import catch_warnings, simplefilter -from pytest import warns +from _pytest.recwarn import warns +from datetime import datetime, timezone + from numpy import arange, ones, array, array_equal, finfo, iinfo, pi from numpy import int8, int16, int32, int64, uint8, uint16, uint32, uint64, \ - float16, float32, float64, complex64, complex128, zeros, ndindex + float16, float32, float64, complex64, complex128, zeros, ndindex, \ + datetime64 from numpy.core.umath import exp from numpy.testing import assert_equal @@ -119,20 +122,6 @@ def test_memory_order(): arrF.flags['F_CONTIGUOUS'] == arr.flags['F_CONTIGUOUS'] -def test_scalars_types(): - # from: https://docs.scipy.org/doc/numpy/user/basics.types.html - encme = [] - for dtype in DTYPES: - for val in (dtype(0),) + get_lims(dtype): - assert isinstance(val, dtype) - encme.append(val) - json = dumps(encme, indent=2) - rec = loads(json) - assert encme == rec - for nr in rec: - assert nr.__class__ in (int, float, complex), 'primitive python type expected, see issue #18' - - def test_array_types(): # from: https://docs.scipy.org/doc/numpy/user/basics.types.html # see also `test_scalars_types` @@ -181,6 +170,23 @@ def test_dump_np_scalars(): assert data[2][3] == rec[2][3] assert data[2] == tuple(rec[2]) + json_tricks_3_17_3_encoded = '[' \ + '{"__ndarray__": -27, "dtype": "int8", "shape": []}, '\ + '{"__ndarray__": {"__complex__": [2.7182817459106445, 37.0]}, "dtype": "complex64", "shape": []}, ' \ + '[{"alpha": {"__ndarray__": -22026.465794806718, "dtype": "float64", "shape": []}, ' \ + '"str-only": {"__ndarray__": {"__complex__": [-1.0, -1.0]}, "dtype": "complex64", "shape": []}}, ' \ + '{"__ndarray__": 123456789, "dtype": "uint32", "shape": []}, ' \ + '{"__ndarray__": 0.367919921875, "dtype": "float16", "shape": []}, ' \ + '{"__set__": [{"__ndarray__": 37, "dtype": "int64", "shape": []}, ' \ + '{"__ndarray__": 0, "dtype": "uint64", "shape": []}]}]]' + rec = loads(json_tricks_3_17_3_encoded) + assert data[0] == rec[0] + assert data[1] == rec[1] + assert data[2][0] == rec[2][0] + assert data[2][1] == rec[2][1] + assert data[2][2] == rec[2][2] + assert data[2][3] == rec[2][3] + assert data[2] == tuple(rec[2]) def test_ndarray_object_nesting(): # Based on issue 53 @@ -223,8 +229,8 @@ def test_compact_mode_unspecified(): gz_json_2 = dumps(data, compression=True) assert gz_json_1 == gz_json_2 json = gzip_decompress(gz_json_1).decode('ascii') - assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ - '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' + assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": true}, ' \ + '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2], "0dim": false}]' def test_compact(): @@ -238,8 +244,8 @@ def test_encode_disable_compact(): data = [array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]]), array([pi, exp(1)])] gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=False)) json = gzip_decompress(gz_json).decode('ascii') - assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "Corder": true}, ' \ - '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2]}]' + assert json == '[{"__ndarray__": [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0]], "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": true}, ' \ + '{"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", "shape": [2], "0dim": false}]' def test_encode_enable_compact_little_endian(): @@ -247,9 +253,9 @@ def test_encode_enable_compact_little_endian(): gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) json = gzip_decompress(gz_json).decode('ascii') assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ - 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": ' \ 'true, "endian": "little"}, {"__ndarray__": "b64:GC1EVPshCUBpVxSLCr8FQA==", "dtype": "float64", ' \ - '"shape": [2], "endian": "little"}]' + '"shape": [2], "0dim": false, "endian": "little"}]' def test_encode_enable_compact_big_endian(): @@ -257,8 +263,8 @@ def test_encode_enable_compact_big_endian(): gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=True, ndarray_store_byteorder='big')) json = gzip_decompress(gz_json).decode('ascii') assert json == '{"__ndarray__": "b64:P/AAAAAAAABAAAAAAAAAAEAIAAAAAAAAQBAAAAAAAABAFAAAAAAAAEAYAA' \ - 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ - 'true, "endian": "big"}' + 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "0dim": false, ' \ + '"Corder": true, "endian": "big"}' def test_encode_enable_compact_native_endian(): @@ -267,11 +273,11 @@ def test_encode_enable_compact_native_endian(): json = gzip_decompress(gz_json).decode('ascii') if sys.byteorder == 'little': assert json == '{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ - 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": ' \ 'true, "endian": "little"}' elif sys.byteorder == 'big': assert json == '{"__ndarray__": "b64:P/AAAAAAAABAAAAAAAAAAEAIAAAAAAAAQBAAAAAAAABAFAAAAAAAAEAYAA' \ - 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'AAAAAAQBwAAAAAAABAIAAAAAAAAA==", "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": ' \ 'true, "endian": "big"}' else: raise Exception("unknown system endianness '{}'".format(sys.byteorder)) @@ -289,9 +295,9 @@ def test_encode_compact_cutoff(): gz_json = dumps(data, compression=True, properties=dict(ndarray_compact=5, ndarray_store_byteorder='little')) json = gzip_decompress(gz_json).decode('ascii') assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEAAAAAAAAA' \ - 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "Corder": ' \ + 'UQAAAAAAAABhAAAAAAAAAHEAAAAAAAAAgQA==", "dtype": "float64", "shape": [2, 4], "0dim": false, "Corder": ' \ 'true, "endian": "little"}, {"__ndarray__": [3.141592653589793, 2.718281828459045], "dtype": "float64", ' \ - '"shape": [2]}]' + '"shape": [2], "0dim": false}]' def test_encode_compact_inline_compression(): @@ -299,7 +305,7 @@ def test_encode_compact_inline_compression(): json = dumps(data, compression=False, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) assert 'b64.gz:' in json, 'If the overall file is not compressed and there are significant savings, then do inline gzip compression.' assert json == '[{"__ndarray__": "b64.gz:H4sIAAAAAAAC/2NgAIEP9gwQ4AChOKC0AJQWgdISUFoGSitAaSUorQKl1aC0BpTWgtI6UFoPShs4AABmfqWAgAAAAA==", ' \ - '"dtype": "float64", "shape": [4, 4], "Corder": true, "endian": "little"}]' + '"dtype": "float64", "shape": [4, 4], "0dim": false, "Corder": true, "endian": "little"}]' def test_encode_compact_no_inline_compression(): @@ -307,7 +313,7 @@ def test_encode_compact_no_inline_compression(): json = dumps(data, compression=False, properties=dict(ndarray_compact=True, ndarray_store_byteorder='little')) assert 'b64.gz:' not in json, 'If the overall file is not compressed, but there are no significant savings, then do not do inline compression.' assert json == '[{"__ndarray__": "b64:AAAAAAAA8D8AAAAAAAAAQAAAAAAAAAhAAAAAAAAAEEA=", ' \ - '"dtype": "float64", "shape": [2, 2], "Corder": true, "endian": "little"}]' + '"dtype": "float64", "shape": [2, 2], "0dim": false, "Corder": true, "endian": "little"}]' def test_decode_compact_mixed_compactness(): @@ -369,17 +375,109 @@ def test_empty(): assert_equal(loads(json), data, 'shape = {} ; json = {}'.format(data.shape, json)) def test_decode_writeable(): - # issue https://github.com/mverleg/pyjson_tricks/issues/90 - data = zeros((2, 2)) + # issue https://github.com/mverleg/pyjson_tricks/issues/90 + data = zeros((2, 2)) + + data_uncompressed = dumps(data) + data_compressed = dumps(data, properties={'ndarray_compact': True}) + + reloaded_uncompressed = loads(data_uncompressed) + reloaded_compressed = loads(data_compressed) + + assert array_equal(data, reloaded_uncompressed) + assert array_equal(data, reloaded_compressed) + + assert reloaded_uncompressed.flags.writeable + assert reloaded_compressed.flags.writeable + + +def test_0_dimensional_array_roundtrip(): + to_dump = zeros((), dtype='uint32') + to_dump[...] = 123 + + the_dumps = dumps(to_dump) + loaded = loads(the_dumps) + assert loaded == to_dump + + the_double_dumps = dumps(loaded) + assert the_dumps == the_double_dumps + + +def test_0_dimensional_array_roundtrip_object(): + the_set = set([1, 2, 3]) + + # We are putting it an object in a numpy array. this should serialize correctly + to_dump = zeros((), dtype=object) + to_dump[...] = the_set + + the_dumps = dumps(to_dump) + the_load = loads(the_dumps) + the_double_dumps = dumps(the_load) + + assert the_dumps == the_double_dumps + + assert isinstance(the_load[()], set) + assert the_set == the_load[()] + + +def test_scalar_roundtrip(): + to_dump = [ + uint8(1), + uint16(2), + uint32(3), + uint64(4), + int8(1), + int16(2), + int32(3), + int64(4), + float32(1), + float64(2), + ] + + the_dumps = dumps(to_dump) + the_load = loads(the_dumps) + + for original, read in zip(to_dump, the_load): + assert original == read + assert original.__class__ == read.__class__ + + the_double_dumps = dumps(loads(dumps(to_dump))) + + assert the_dumps == the_double_dumps + + +def test_round_trip_datetime64_scalars(): + now_utc = datetime.now(timezone.utc).replace(tzinfo=None) + now_M = datetime64(now_utc, 'M') + now_D = datetime64(now_utc, 'D') + now_h = datetime64(now_utc, 'h') + now_m = datetime64(now_utc, 'm') + now_s = datetime64(now_utc, 's') + now_ms = datetime64(now_utc, 'ms') + now_us = datetime64(now_utc, 'us') + now_ns = datetime64(now_utc, 'ns') + + to_dump = [ + now_M, + now_D, + now_h, + now_m, + now_s, + now_ms, + now_us, + now_ns, + now_us, + now_ns, + ] - data_uncompressed = dumps(data) - data_compressed = dumps(data, properties={'ndarray_compact': True}) + the_dumps = dumps(to_dump) + the_load = loads(the_dumps) - reloaded_uncompressed = loads(data_uncompressed) - reloaded_compressed = loads(data_compressed) + for original, read in zip(to_dump, the_load): + assert original == read + assert original.__class__ == read.__class__ + assert original.dtype == read.dtype - assert array_equal(data, reloaded_uncompressed) - assert array_equal(data, reloaded_compressed) + the_double_dumps = dumps(loads(dumps(to_dump))) - assert reloaded_uncompressed.flags.writeable - assert reloaded_compressed.flags.writeable + assert the_dumps == the_double_dumps From 7f235a324ade395670ed6fa3b225c5f9f254e225 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 2 Jan 2024 18:38:11 -0500 Subject: [PATCH 02/16] Modify float check --- json_tricks/encoders.py | 242 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) diff --git a/json_tricks/encoders.py b/json_tricks/encoders.py index 141bb2d..eb38317 100644 --- a/json_tricks/encoders.py +++ b/json_tricks/encoders.py @@ -5,6 +5,7 @@ from fractions import Fraction from functools import wraps from json import JSONEncoder +from json.encoder import encode_basestring_ascii, encode_basestring, INFINITY import sys from .utils import hashodict, get_module_name_from_object, NoEnumException, NoPandasException, \ @@ -81,6 +82,54 @@ def default(self, obj, *args, **kwargs): type(obj), self.__class__.__name__, ', '.join(str(encoder) for encoder in self.obj_encoders))) return obj + def iterencode(self, o, _one_shot=False): + """Encode the given object and yield each string + representation as available. + + For example:: + + for chunk in JSONEncoder().iterencode(bigobject): + mysocket.write(chunk) + + """ + if self.check_circular: + markers = {} + else: + markers = None + if self.ensure_ascii: + _encoder = encode_basestring_ascii + else: + _encoder = encode_basestring + + def floatstr(o, allow_nan=self.allow_nan, + _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY): + # Check for specials. Note that this type of test is processor + # and/or platform-specific, so do tests which don't depend on the + # internals. + + if o != o: + text = 'NaN' + elif o == _inf: + text = 'Infinity' + elif o == _neginf: + text = '-Infinity' + else: + return _repr(o) + + if not allow_nan: + raise ValueError( + "Out of range float values are not JSON compliant: " + + repr(o)) + + return text + + + _iterencode = _make_iterencode( + markers, self.default, _encoder, self.indent, floatstr, + self.key_separator, self.item_separator, self.sort_keys, + self.skipkeys, _one_shot) + return _iterencode(o, 0) + def json_date_time_encode(obj, primitives=False): """ @@ -480,3 +529,196 @@ def default(self, obj, *args, **kwargs): warnings.warn('`NoNumpyEncoder` is deprecated, use `nonumpy_encode`', JsonTricksDeprecation) obj = nonumpy_encode(obj) return super(NoNumpyEncoder, self).default(obj, *args, **kwargs) + +def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, + _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, + ## HACK: hand-optimized bytecode; turn globals into locals + ValueError=ValueError, + dict=dict, + float=float, + id=id, + int=int, + isinstance=isinstance, + list=list, + str=str, + tuple=tuple, + _intstr=int.__repr__, + ): + + try: + import numpy + def isfloatinstance(obj): + return isinstance(obj, float) and not isinstance(obj, numpy.number) + except ImportError: + def isfloatinstance(obj): + return isinstance(obj, float) + + if _indent is not None and not isinstance(_indent, str): + _indent = ' ' * _indent + + def _iterencode_list(lst, _current_indent_level): + if not lst: + yield '[]' + return + if markers is not None: + markerid = id(lst) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = lst + buf = '[' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + _indent * _current_indent_level + separator = _item_separator + newline_indent + buf += newline_indent + else: + newline_indent = None + separator = _item_separator + first = True + for value in lst: + if first: + first = False + else: + buf = separator + if isinstance(value, str): + yield buf + _encoder(value) + elif value is None: + yield buf + 'null' + elif value is True: + yield buf + 'true' + elif value is False: + yield buf + 'false' + elif isinstance(value, int): + # Subclasses of int/float may override __repr__, but we still + # want to encode them as integers/floats in JSON. One example + # within the standard library is IntEnum. + yield buf + _intstr(value) + elif isfloatinstance(value): + # see comment above for int + yield buf + _floatstr(value) + else: + yield buf + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + _indent * _current_indent_level + yield ']' + if markers is not None: + del markers[markerid] + + def _iterencode_dict(dct, _current_indent_level): + if not dct: + yield '{}' + return + if markers is not None: + markerid = id(dct) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = dct + yield '{' + if _indent is not None: + _current_indent_level += 1 + newline_indent = '\n' + _indent * _current_indent_level + item_separator = _item_separator + newline_indent + yield newline_indent + else: + newline_indent = None + item_separator = _item_separator + first = True + if _sort_keys: + items = sorted(dct.items()) + else: + items = dct.items() + for key, value in items: + if isinstance(key, str): + pass + # JavaScript is weakly typed for these, so it makes sense to + # also allow them. Many encoders seem to do something like this. + elif isinstance(key, float): + # see comment for int/float in _make_iterencode + key = _floatstr(key) + elif key is True: + key = 'true' + elif key is False: + key = 'false' + elif key is None: + key = 'null' + elif isinstance(key, int): + # see comment for int/float in _make_iterencode + key = _intstr(key) + elif _skipkeys: + continue + else: + raise TypeError(f'keys must be str, int, float, bool or None, ' + f'not {key.__class__.__name__}') + if first: + first = False + else: + yield item_separator + yield _encoder(key) + yield _key_separator + if isinstance(value, str): + yield _encoder(value) + elif value is None: + yield 'null' + elif value is True: + yield 'true' + elif value is False: + yield 'false' + elif isinstance(value, int): + # see comment for int/float in _make_iterencode + yield _intstr(value) + elif isfloatinstance(value): + # see comment for int/float in _make_iterencode + yield _floatstr(value) + else: + if isinstance(value, (list, tuple)): + chunks = _iterencode_list(value, _current_indent_level) + elif isinstance(value, dict): + chunks = _iterencode_dict(value, _current_indent_level) + else: + chunks = _iterencode(value, _current_indent_level) + yield from chunks + if newline_indent is not None: + _current_indent_level -= 1 + yield '\n' + _indent * _current_indent_level + yield '}' + if markers is not None: + del markers[markerid] + + def _iterencode(o, _current_indent_level): + if isinstance(o, str): + yield _encoder(o) + elif o is None: + yield 'null' + elif o is True: + yield 'true' + elif o is False: + yield 'false' + elif isinstance(o, int): + # see comment for int/float in _make_iterencode + yield _intstr(o) + elif isfloatinstance(o): + # see comment for int/float in _make_iterencode + yield _floatstr(o) + elif isinstance(o, (list, tuple)): + yield from _iterencode_list(o, _current_indent_level) + elif isinstance(o, dict): + yield from _iterencode_dict(o, _current_indent_level) + else: + if markers is not None: + markerid = id(o) + if markerid in markers: + raise ValueError("Circular reference detected") + markers[markerid] = o + o = _default(o) + yield from _iterencode(o, _current_indent_level) + if markers is not None: + del markers[markerid] + return _iterencode From 5df1d114b9bdcd98789e2c0849d05d3164108109 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Tue, 17 Sep 2024 12:54:39 -0400 Subject: [PATCH 03/16] Update README with fork information update name in README --- README.md | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index e05c663..494b8fc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ -# JSON tricks (python) +> [!NOTE] +>The primary reason for this fork is to enable full round-trip serialization and deserialization of NumPy scalars and 0-dimensional arrays to JSON and back. This feature is essential for applications that require precise data preservation when working with NumPy data types. + +Despite contributing this enhancement to the original project (see [Pull Request #99](https://github.com/mverleg/pyjson_tricks/pull/99)), there was a difference in opinion with the maintainer regarding its inclusion. As a result, this fork aims to continue development with this functionality integrated. + +# ro_json The [pyjson-tricks] package brings several pieces of functionality to python handling of json files: @@ -31,7 +36,7 @@ Thanks for all the Github stars⭐! You can install using ``` bash -pip install json-tricks +pip install ro_json ``` Decoding of some data types needs the corresponding package to be @@ -75,7 +80,7 @@ this yields: ``` which will be converted back to a numpy array when using -`json_tricks.loads`. Note that the memory order (`Corder`) is only +`ro_json.loads`. Note that the memory order (`Corder`) is only stored in v3.1 and later and for arrays with at least 2 dimensions. As you see, this uses the magic key `__ndarray__`. Don't use @@ -87,9 +92,9 @@ closest python primitive type. A special representation was not feasible, because Python's json implementation serializes some numpy types as primitives, without consulting custom encoders. If you want to preserve the exact numpy type, use -[encode_scalars_inplace](https://json-tricks.readthedocs.io/en/latest/#json_tricks.np_utils.encode_scalars_inplace). +[encode_scalars_inplace](https://json-tricks.readthedocs.io/en/latest/#ro_json.np_utils.encode_scalars_inplace). -There is also a compressed format (thanks `claydugo` for fix). From +There is also a compressed format (thanks `claydugo` for fix). From the next major release, this will be default when using compression. For now, you can use it as: @@ -122,14 +127,14 @@ dumps(data, compression=False, properties={'ndarray_compact': 8}) ## Class instances -`json_tricks` can serialize class instances. +`ro_json` can serialize class instances. If the class behaves normally (not generated dynamic, no `__new__` or `__metaclass__` magic, etc) *and* all it's attributes are serializable, then this should work by default. ``` python -# json_tricks/test_class.py +# ro_json/test_class.py class MyTestCls: def __init__(self, **kwargs): for k, v in kwargs.items(): @@ -146,7 +151,7 @@ You'll get your instance back. Here the json looks like this: ``` javascript { "__instance_type__": [ - "json_tricks.test_class", + "ro_json.test_class", "MyTestCls" ], "attributes": { @@ -211,7 +216,7 @@ Date, time, datetime and timedelta objects are stored as dictionaries of "day", "hour", "millisecond" etc keys, for each nonzero property. Timezone name is also stored in case it is set, as is DST (thanks `eumir`). -You'll need to have `pytz` installed to use timezone-aware date/times, +You'll need to have `pytz` installed to use timezone-aware date/times, it's not needed for naive date/times. ``` javascript @@ -303,12 +308,12 @@ Since comments aren't stored in the Python representation of the data, loading and then saving a json file will remove the comments (it also likely changes the indentation). -The implementation of comments is a bit crude, which means that there are +The implementation of comments is a bit crude, which means that there are some exceptional cases that aren't handled correctly ([#57](https://github.com/mverleg/pyjson_tricks/issues/57)). -It is also not very fast. For that reason, if `ignore_comments` wasn't -explicitly set to True, then json-tricks first tries to parge without -ignoring comments. If that fails, then it will automatically re-try +It is also not very fast. For that reason, if `ignore_comments` wasn't +explicitly set to True, then ro_json first tries to parse without +ignoring comments. If that fails, then it will automatically re-try with comment handling. This makes the no-comment case faster at the cost of the comment case, so if you are expecting comments make sure to set `ignore_comments` to True. @@ -328,10 +333,10 @@ of the comment case, so if you are expecting comments make sure to set * Save and load `Enum` (thanks to `Jenselme`), either built-in in python3.4+, or with the [enum34](https://pypi.org/project/enum34/) package in earlier versions. `IntEnum` needs - [encode_intenums_inplace](https://json-tricks.readthedocs.io/en/latest/#json_tricks.utils.encode_intenums_inplace). -* `json_tricks` allows for gzip compression using the + [encode_intenums_inplace](https://json-tricks.readthedocs.io/en/latest/#ro_json.utils.encode_intenums_inplace). +* `ro_json` allows for gzip compression using the `compression=True` argument (off by default). -* `json_tricks` can check for duplicate keys in maps by setting +* `ro_json` can check for duplicate keys in maps by setting `allow_duplicates` to False. These are [kind of allowed](http://stackoverflow.com/questions/21832701/does-json-syntax-allow-duplicate-keys-in-an-object), but are handled inconsistently between json implementations. In @@ -340,8 +345,8 @@ of the comment case, so if you are expecting comments make sure to set * Save and load `pathlib.Path` objects (e.g., the current path, `Path('.')`, serializes as `{"__pathlib__": "."}`) (thanks to `bburan`). -* Save and load bytes (python 3+ only), which will be encoded as utf8 if - that is valid, or as base64 otherwise. Base64 is always used if +* Save and load bytes (python 3+ only), which will be encoded as utf8 if + that is valid, or as base64 otherwise. Base64 is always used if primitives are requested. Serialized as `[{"__bytes_b64__": "aGVsbG8="}]` vs `[{"__bytes_utf8__": "hello"}]`. * Save and load slices (thanks to `claydugo`). @@ -455,9 +460,9 @@ print(dumps(data, primitives=True)) ] ``` -Note that valid json is produced either way: ``json-tricks`` stores meta data as normal json, but other packages probably won't interpret it. +Note that valid json is produced either way: ``ro_json`` stores meta data as normal json, but other packages probably won't interpret it. -Note that valid json is produced either way: `json-tricks` stores meta +Note that valid json is produced either way: `ro_json` stores meta data as normal json, but other packages probably won't interpret it. # Usage & contributions @@ -477,6 +482,4 @@ Contributors not yet mentioned: `janLo` (performance boost). Tests are run automatically for commits to the repository for all supported versions. This is the status: -![image](https://github.com/mverleg/pyjson_tricks/workflows/pyjson-tricks/badge.svg?branch=master) - -To run the tests manually for your version, see [this guide](tests/run_locally.md). \ No newline at end of file +To run the tests manually for your version, see [this guide](tests/run_locally.md). From a28cd99d8d9d15c6cb4d8fd17b4c3e80190d6fc9 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Tue, 17 Sep 2024 13:36:12 -0400 Subject: [PATCH 04/16] Update json_tricks references to ro_json in .rst files, docs, and README --- .git-blame-ignore-revs | 1 + README.md | 4 ++-- docs/index.rst | 34 ++++++++++++++-------------- {json_tricks => ro_json}/__init__.py | 0 {json_tricks => ro_json}/_version.py | 0 {json_tricks => ro_json}/comment.py | 0 {json_tricks => ro_json}/decoders.py | 2 +- {json_tricks => ro_json}/encoders.py | 2 +- {json_tricks => ro_json}/nonp.py | 10 ++++---- {json_tricks => ro_json}/np.py | 4 ++-- {json_tricks => ro_json}/np_utils.py | 0 {json_tricks => ro_json}/utils.py | 2 +- setup.py | 2 +- tests/test_bare.py | 6 ++--- tests/test_class.py | 2 +- tests/test_enum.py | 4 ++-- tests/test_meta.py | 4 ++-- tests/test_np.py | 8 +++---- tests/test_pandas.py | 2 +- tests/test_pathlib.py | 2 +- tests/test_slice.py | 2 +- tests/test_tz.py | 4 ++-- tests/test_utils.py | 2 +- 23 files changed, 49 insertions(+), 48 deletions(-) create mode 100644 .git-blame-ignore-revs rename {json_tricks => ro_json}/__init__.py (100%) rename {json_tricks => ro_json}/_version.py (100%) rename {json_tricks => ro_json}/comment.py (100%) rename {json_tricks => ro_json}/decoders.py (99%) rename {json_tricks => ro_json}/encoders.py (99%) rename {json_tricks => ro_json}/nonp.py (97%) rename {json_tricks => ro_json}/np.py (82%) rename {json_tricks => ro_json}/np_utils.py (100%) rename {json_tricks => ro_json}/utils.py (99%) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..0d6bae9 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1 @@ +2fac87779b90f895fba58103efd9d1b1c101a722 diff --git a/README.md b/README.md index 494b8fc..ddfc5a3 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ You can import the usual json functions dump(s) and load(s), as well as a separate comment removal function, as follows: ``` bash -from json_tricks import dump, dumps, load, loads, strip_comments +from ro_json import dump, dumps, load, loads, strip_comments ``` The exact signatures of these and other functions are in the [documentation](http://json-tricks.readthedocs.org/en/latest/#main-components). @@ -258,7 +258,7 @@ ordered = OrderedDict(( Converting to json and back will preserve the order: ``` python -from json_tricks import dumps, loads +from ro_json import dumps, loads json = dumps(ordered) ordered = loads(json, preserve_order=True) ``` diff --git a/docs/index.rst b/docs/index.rst index dc4f7c1..78359ff 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,30 +9,30 @@ Support for numpy, pandas and other libraries should work automatically if those dumps +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.nonp.dumps +.. autofunction:: ro_json.nonp.dumps -.. autofunction:: json_tricks.np.dumps +.. autofunction:: ro_json.np.dumps dump +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.nonp.dump +.. autofunction:: ro_json.nonp.dump -.. autofunction:: json_tricks.np.dump +.. autofunction:: ro_json.np.dump loads +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.nonp.loads +.. autofunction:: ro_json.nonp.loads -.. autofunction:: json_tricks.np.loads +.. autofunction:: ro_json.np.loads load +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.nonp.load +.. autofunction:: ro_json.nonp.load -.. autofunction:: json_tricks.np.load +.. autofunction:: ro_json.np.load Utilities --------------------------------------- @@ -40,19 +40,19 @@ Utilities strip comments +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.comment.strip_comments +.. autofunction:: ro_json.comment.strip_comments numpy +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.np.numpy_encode +.. autofunction:: ro_json.np.numpy_encode -.. autofunction:: json_tricks.np.json_numpy_obj_hook +.. autofunction:: ro_json.np.json_numpy_obj_hook class instances +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.encoders.class_instance_encode +.. autofunction:: ro_json.encoders.class_instance_encode .. autoclass:: json_tricks.decoders.ClassInstanceHook @@ -61,20 +61,20 @@ enum instances Support for enums was added in Python 3.4. Support for previous versions of Python is available with the `enum 34`_ package. -.. autofunction:: json_tricks.encoders.enum_instance_encode +.. autofunction:: ro_json.encoders.enum_instance_encode .. autoclass:: json_tricks.decoders.EnumInstanceHook By default ``IntEnum`` cannot be encoded as enums since they cannot be differenciated from integers. To serialize them, you must use `encode_intenums_inplace` which mutates a nested data structure (in place!) to replace any ``IntEnum`` by their representation. If you serialize this result, it can subsequently be loaded without further adaptations. -.. autofunction:: json_tricks.utils.encode_intenums_inplace +.. autofunction:: ro_json.utils.encode_intenums_inplace date/time +++++++++++++++++++++++++++++++++++++++ -.. autofunction:: json_tricks.encoders.json_date_time_encode +.. autofunction:: ro_json.encoders.json_date_time_encode -.. autofunction:: json_tricks.decoders.json_date_time_hook +.. autofunction:: ro_json.decoders.json_date_time_hook numpy scalars +++++++++++++++++++++++++++++++++++++++ @@ -85,7 +85,7 @@ So if you really want to encode numpy scalars, you'll have to do the conversion It's not great, but unless the Python json module changes, it's the best that can be done. See `issue 18`_ for more details. -.. autofunction:: json_tricks.np_utils.encode_scalars_inplace +.. autofunction:: ro_json.np_utils.encode_scalars_inplace Table of content --------------------------------------- diff --git a/json_tricks/__init__.py b/ro_json/__init__.py similarity index 100% rename from json_tricks/__init__.py rename to ro_json/__init__.py diff --git a/json_tricks/_version.py b/ro_json/_version.py similarity index 100% rename from json_tricks/_version.py rename to ro_json/_version.py diff --git a/json_tricks/comment.py b/ro_json/comment.py similarity index 100% rename from json_tricks/comment.py rename to ro_json/comment.py diff --git a/json_tricks/decoders.py b/ro_json/decoders.py similarity index 99% rename from json_tricks/decoders.py rename to ro_json/decoders.py index 67336a1..c41c525 100644 --- a/json_tricks/decoders.py +++ b/ro_json/decoders.py @@ -6,7 +6,7 @@ from decimal import Decimal from fractions import Fraction -from json_tricks import NoEnumException, NoPandasException, NoNumpyException +from ro_json import NoEnumException, NoPandasException, NoNumpyException from .utils import ClassInstanceHookBase, nested_index, str_type, gzip_decompress, filtered_wrapper diff --git a/json_tricks/encoders.py b/ro_json/encoders.py similarity index 99% rename from json_tricks/encoders.py rename to ro_json/encoders.py index eb38317..08dcaf2 100644 --- a/json_tricks/encoders.py +++ b/ro_json/encoders.py @@ -442,7 +442,7 @@ def numpy_encode(obj, primitives=False, properties=None): numpy_encode._warned_compact = True warnings.warn('storing ndarray in text format while compression in enabled; in the next major version ' 'of json_tricks, the default when using compression will change to compact mode; to already use ' - 'that smaller format, pass `properties={"ndarray_compact": True}` to json_tricks.dump; ' + 'that smaller format, pass `properties={"ndarray_compact": True}` to ro_json.dump; ' 'to silence this warning, pass `properties={"ndarray_compact": False}`; ' 'see issue https://github.com/mverleg/pyjson_tricks/issues/73', JsonTricksDeprecation) # Property 'use_compact' may also be an integer, in which case it's the number of diff --git a/json_tricks/nonp.py b/ro_json/nonp.py similarity index 97% rename from json_tricks/nonp.py rename to ro_json/nonp.py index d00d754..d779833 100644 --- a/json_tricks/nonp.py +++ b/ro_json/nonp.py @@ -3,7 +3,7 @@ from os import fsync from sys import exc_info -from json_tricks.utils import is_py3, dict_default, gzip_compress, gzip_decompress, JsonTricksDeprecation +from ro_json.utils import is_py3, dict_default, gzip_compress, gzip_decompress, JsonTricksDeprecation from .utils import str_type, NoNumpyException # keep 'unused' imports from .comment import strip_comments # keep 'unused' imports #TODO @mark: imports removed? @@ -111,7 +111,7 @@ def dumps(obj, sort_keys=None, cls=None, obj_encoders=DEFAULT_ENCODERS, extra_ob Other arguments are passed on to `cls`. Note that `sort_keys` should be false if you want to preserve order. """ if not hasattr(extra_obj_encoders, '__iter__'): - raise TypeError('`extra_obj_encoders` should be a tuple in `json_tricks.dump(s)`') + raise TypeError('`extra_obj_encoders` should be a tuple in `ro_json.dump(s)`') encoders = tuple(extra_obj_encoders) + tuple(obj_encoders) properties = properties or {} dict_default(properties, 'primitives', primitives) @@ -217,7 +217,7 @@ def loads(string, preserve_order=True, ignore_comments=None, decompression=None, Other arguments are passed on to json_func. """ if not hasattr(extra_obj_pairs_hooks, '__iter__'): - raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `json_tricks.load(s)`') + raise TypeError('`extra_obj_pairs_hooks` should be a tuple in `ro_json.load(s)`') if decompression is None: decompression = isinstance(string, bytes) and string[:2] == b'\x1f\x8b' if decompression: @@ -226,7 +226,7 @@ def loads(string, preserve_order=True, ignore_comments=None, decompression=None, if conv_str_byte: string = string.decode(ENCODING) else: - raise TypeError(('The input was of non-string type "{0:}" in `json_tricks.load(s)`. ' + raise TypeError(('The input was of non-string type "{0:}" in `ro_json.load(s)`. ' 'Bytes cannot be automatically decoding since the encoding is not known. Recommended ' 'way is to instead encode the bytes to a string and pass that string to `load(s)`, ' 'for example bytevar.encode("utf-8") if utf-8 is the encoding. Alternatively you can ' @@ -248,7 +248,7 @@ def loads(string, preserve_order=True, ignore_comments=None, decompression=None, # if this fails, re-try parsing after stripping comments result = _strip_loads(string, hook, True, **jsonkwargs) if not getattr(loads, '_ignore_comments_warned', False): - warnings.warn('`json_tricks.load(s)` stripped some comments, but `ignore_comments` was ' + warnings.warn('`ro_json.load(s)` stripped some comments, but `ignore_comments` was ' 'not passed; in the next major release, the behaviour when `ignore_comments` is not ' 'passed will change; it is recommended to explicitly pass `ignore_comments=True` if ' 'you want to strip comments; see https://github.com/mverleg/pyjson_tricks/issues/74', diff --git a/json_tricks/np.py b/ro_json/np.py similarity index 82% rename from json_tricks/np.py rename to ro_json/np.py index 5f269c9..1fbec8a 100644 --- a/json_tricks/np.py +++ b/ro_json/np.py @@ -16,10 +16,10 @@ import numpy except ImportError: raise NoNumpyException('Could not load numpy, maybe it is not installed? If you do not want to use numpy encoding ' - 'or decoding, you can import the functions from json_tricks.nonp instead, which do not need numpy.') + 'or decoding, you can import the functions from ro_json.nonp instead, which do not need numpy.') -warnings.warn('`json_tricks.np` is deprecated, you can import directly from `json_tricks`', JsonTricksDeprecation) +warnings.warn('`ro_json.np` is deprecated, you can import directly from `json_tricks`', JsonTricksDeprecation) DEFAULT_NP_ENCODERS = [numpy_encode,] + DEFAULT_ENCODERS # DEPRECATED diff --git a/json_tricks/np_utils.py b/ro_json/np_utils.py similarity index 100% rename from json_tricks/np_utils.py rename to ro_json/np_utils.py diff --git a/json_tricks/utils.py b/ro_json/utils.py similarity index 99% rename from json_tricks/utils.py rename to ro_json/utils.py index b6e08b8..ca68a74 100644 --- a/json_tricks/utils.py +++ b/ro_json/utils.py @@ -153,7 +153,7 @@ def encode_intenums_inplace(obj): by json-tricks. This happens in-place (the object is changed, use a copy). """ from enum import IntEnum - from json_tricks import encoders + from ro_json import encoders if isinstance(obj, IntEnum): return encoders.enum_instance_encode(obj) if isinstance(obj, dict): diff --git a/setup.py b/setup.py index 32b2c45..e9c65e2 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ # with open('json_tricks/_version.py', 'r') as fh: # version = fh.read().strip() -from json_tricks._version import VERSION +from ro_json._version import VERSION requires = [] if version_info < (2, 7, 0): diff --git a/tests/test_bare.py b/tests/test_bare.py index a3c67c9..ea09d74 100644 --- a/tests/test_bare.py +++ b/tests/test_bare.py @@ -15,10 +15,10 @@ import pytest from pytest import raises, fail, warns -from json_tricks import fallback_ignore_unknown, DuplicateJsonKeyException -from json_tricks.nonp import strip_comments, dump, dumps, load, loads, \ +from ro_json import fallback_ignore_unknown, DuplicateJsonKeyException +from ro_json.nonp import strip_comments, dump, dumps, load, loads, \ ENCODING -from json_tricks.utils import is_py3, gzip_compress, JsonTricksDeprecation, str_type +from ro_json.utils import is_py3, gzip_compress, JsonTricksDeprecation, str_type from .test_class import MyTestCls, CustomEncodeCls, SubClass, SuperClass, SlotsBase, SlotsDictABC, SlotsStr, \ SlotsABCDict, SlotsABC diff --git a/tests/test_class.py b/tests/test_class.py index 590a8c9..692413b 100644 --- a/tests/test_class.py +++ b/tests/test_class.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- import weakref -from json_tricks import dumps, loads +from ro_json import dumps, loads class MyTestCls(object): diff --git a/tests/test_enum.py b/tests/test_enum.py index fa0aea6..4d2cd55 100644 --- a/tests/test_enum.py +++ b/tests/test_enum.py @@ -5,8 +5,8 @@ from datetime import datetime from functools import partial from enum import Enum, IntEnum -from json_tricks import dumps, loads, encode_intenums_inplace -from json_tricks.encoders import enum_instance_encode +from ro_json import dumps, loads, encode_intenums_inplace +from ro_json.encoders import enum_instance_encode PY2 = sys.version_info[0] == 2 diff --git a/tests/test_meta.py b/tests/test_meta.py index 89d2794..1f76fce 100644 --- a/tests/test_meta.py +++ b/tests/test_meta.py @@ -3,5 +3,5 @@ def test_version(): - import json_tricks - assert re.match(r'^\d+\.\d+\.\d+$', json_tricks.__version__) is not None + import ro_json + assert re.match(r'^\d+\.\d+\.\d+$', ro_json.__version__) is not None diff --git a/tests/test_np.py b/tests/test_np.py index 675ee0e..031c019 100644 --- a/tests/test_np.py +++ b/tests/test_np.py @@ -17,10 +17,10 @@ from numpy.core.umath import exp from numpy.testing import assert_equal -from json_tricks import numpy_encode -from json_tricks.np import dump, dumps, load, loads -from json_tricks.np_utils import encode_scalars_inplace -from json_tricks.utils import JsonTricksDeprecation, gzip_decompress +from ro_json import numpy_encode +from ro_json.np import dump, dumps, load, loads +from ro_json.np_utils import encode_scalars_inplace +from ro_json.utils import JsonTricksDeprecation, gzip_decompress from .test_bare import cls_instance from .test_class import MyTestCls diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 5c3d19c..8eb5ee0 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -5,7 +5,7 @@ from numpy import linspace, isnan from numpy.testing import assert_equal from pandas import DataFrame, Series -from json_tricks import dumps, loads +from ro_json import dumps, loads from tests.test_bare import nonpdata diff --git a/tests/test_pathlib.py b/tests/test_pathlib.py index fb0dbbf..f928e82 100644 --- a/tests/test_pathlib.py +++ b/tests/test_pathlib.py @@ -7,7 +7,7 @@ from pathlib import Path -from json_tricks import dumps, loads +from ro_json import dumps, loads # These paths are not necessarily actual paths that exist, but are sufficient diff --git a/tests/test_slice.py b/tests/test_slice.py index 9ded28f..1cf6d00 100644 --- a/tests/test_slice.py +++ b/tests/test_slice.py @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- from pathlib import Path -from json_tricks import dumps, loads +from ro_json import dumps, loads def test_slice(): original_slice = slice(0, 10, 2) diff --git a/tests/test_tz.py b/tests/test_tz.py index 0f36a51..cd9bc84 100644 --- a/tests/test_tz.py +++ b/tests/test_tz.py @@ -7,8 +7,8 @@ """ from datetime import datetime, date, time, timedelta, timezone -from json_tricks import dumps, loads -from json_tricks.utils import is_py3 +from ro_json import dumps, loads +from ro_json.utils import is_py3 import pytz diff --git a/tests/test_utils.py b/tests/test_utils.py index e53cc8c..0bba894 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -from json_tricks.utils import hashodict, get_arg_names, nested_index +from ro_json.utils import hashodict, get_arg_names, nested_index def test_hashodict(): From 121d6f57afd887b07367815911f258f27ec74421 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Tue, 17 Sep 2024 13:42:48 -0400 Subject: [PATCH 05/16] Add package rename commit to .git-blame-ignore-revs --- .git-blame-ignore-revs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index 0d6bae9..5c15934 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -1 +1 @@ -2fac87779b90f895fba58103efd9d1b1c101a722 +40e5de5211ff6967c6e14f47fd62b3add5997d5f From ec536c7888799dff3c7a876141ea910f73618619 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Tue, 17 Sep 2024 13:45:44 -0400 Subject: [PATCH 06/16] Update setup.py for ro_json --- setup.py | 115 +++++++++++++++++++++++++------------------------------ 1 file changed, 52 insertions(+), 63 deletions(-) diff --git a/setup.py b/setup.py index e9c65e2..255ca47 100644 --- a/setup.py +++ b/setup.py @@ -1,69 +1,58 @@ -# -*- coding: utf-8 -*- +from setuptools import setup, find_packages -from sys import version_info -import warnings +with open('README.md', 'r', encoding='utf-8') as fh: + readme = fh.read() -from setuptools import setup - -with open('README.md', 'r') as fh: - readme = fh.read() - -# with open('json_tricks/_version.py', 'r') as fh: -# version = fh.read().strip() from ro_json._version import VERSION -requires = [] -if version_info < (2, 7, 0): - requires.append('ordereddict') - -if (version_info[0] == 2 and version_info[1] < 7) or \ - (version_info[0] == 3 and version_info[1] < 4) or \ - version_info[0] not in (2, 3): - raise warnings.warn('`json_tricks` does not support Python version {}.{}' - .format(version_info[0], version_info[1])) - setup( - name='json_tricks', - description='Extra features for Python\'s JSON: comments, order, numpy, ' - 'pandas, datetimes, and many more! Simple but customizable.', - long_description_content_type='text/markdown', - long_description=readme, - url='https://github.com/mverleg/pyjson_tricks', - author='Mark V', - maintainer='Mark V', - author_email='markv.nl.dev@gmail.com', - license='Revised BSD License (LICENSE.txt)', - keywords=['json', 'numpy', 'OrderedDict', 'comments', 'pandas', 'pytz', - 'enum', 'encode', 'decode', 'serialize', 'deserialize'], - version=VERSION, - packages=['json_tricks'], - package_data=dict( - json_tricks=['LICENSE.txt', 'README.md', 'VERSION'], - # tests=['tests/*.py'], - ), - # include_package_data=True, - zip_safe=True, - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Development Status :: 6 - Mature', - 'Intended Audience :: Developers', - 'Natural Language :: English', - 'License :: OSI Approved :: BSD License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Topic :: Software Development :: Libraries :: Python Modules', - # 'Topic :: Utilities', - ], - install_requires=requires, + name='ro_json', + version=VERSION, + description='Extra features for Python\'s JSON: comments, order, numpy, ' + 'pandas, datetimes, and many more! Simple but customizable.', + long_description=readme, + long_description_content_type='text/markdown', + url='https://github.com/ramonaoptics/ro_json', + author='Clay Dugo', + author_email='clay@ramonaoptics.com', + license='BSD-3-Clause', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Developers', + 'Natural Language :: English', + 'License :: OSI Approved :: BSD License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', + 'Programming Language :: Python :: Implementation :: CPython', + 'Programming Language :: Python :: Implementation :: PyPy', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + keywords=[ + 'json', + 'numpy', + 'OrderedDict', + 'comments', + 'pandas', + 'pytz', + 'enum', + 'encode', + 'decode', + 'serialize', + 'deserialize', + 'roundtrip', + ], + packages=find_packages(exclude=["tests*"]), + include_package_data=True, + install_requires=[], + python_requires='>=3.10', + project_urls={ + # 'Documentation': 'https://your-package-docs-url', + 'Source': 'https://github.com/ramonaoptics/ro_json', + 'Tracker': 'https://github.com/ramonaoptics/ro_json/issues', + }, + license_files=('LICENSE.txt',), ) From a65d4046d7483b6eff53f6e9ab1358d35fdbe730 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Tue, 17 Sep 2024 14:12:01 -0400 Subject: [PATCH 07/16] Add CI to release to PyPI Change release to indicate 1.0.0 for fork --- .github/workflows/pypi.yaml | 40 +++++++++++++++++++++++++++++++++++++ ro_json/_version.py | 3 +-- 2 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/pypi.yaml diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml new file mode 100644 index 0000000..1a4a29d --- /dev/null +++ b/.github/workflows/pypi.yaml @@ -0,0 +1,40 @@ +# https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ +name: Publish Python 🐍 distributions πŸ“¦ to PyPI and TestPyPI + +on: + push: + tags: + - '*' + +permissions: + id-token: write + contents: read + +jobs: + build-n-publish: + name: Build and Publish Python 🐍 Distributions πŸ“¦ to PyPI and TestPyPI + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install build + run: python -m pip install --upgrade pip build + + - name: Build Distributions + run: python -m build --sdist --wheel --outdir dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + print-hash: true + verify-metadata: true diff --git a/ro_json/_version.py b/ro_json/_version.py index 8b30903..285aa97 100644 --- a/ro_json/_version.py +++ b/ro_json/_version.py @@ -1,3 +1,2 @@ - -VERSION = '3.17.2' +VERSION = '1.0.0' From d533ed7c1dceb1949d4be269c68138d9cc757e0d Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Tue, 17 Sep 2024 14:22:00 -0400 Subject: [PATCH 08/16] Update tests to remove testing older python versions --- .github/workflows/tests.yml | 71 +++++++++---------------------------- 1 file changed, 16 insertions(+), 55 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3b84922..5c0f1b9 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,5 +1,4 @@ - -name: 'pyjson-tricks' +name: ro_json on: push: @@ -9,71 +8,33 @@ on: jobs: build: - name: tests + name: Tests on Python ${{ matrix.python-version }} runs-on: ubuntu-latest + strategy: - max-parallel: 8 - fail-fast: false matrix: - libraries: [ - 'vanilla', - 'tz', - 'path', - 'numpy', - 'pandas', - 'all' - ] python-version: [ - '3.7', - '3.8', - '3.9', - '3.10', - '3.11' + '3.10', + '3.11', + '3.12' ] + max-parallel: 8 + fail-fast: false + steps: - - uses: actions/checkout@v1 + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest - if [ "${{ matrix.python-version }}" == "2.7" ] ; then - pip install enum34 - fi - export LIBS="${{ matrix.libraries }}" - if [ "$LIBS" == "tz" ] || [ "$LIBS" == "all" ] ; then - pip install pytz - fi - if [ "$LIBS" == "path" ] || [ "$LIBS" == "all" ] ; then - pip install pathlib - fi - if [ "$LIBS" == "numpy" ] || [ "$LIBS" == "all" ] ; then - pip install numpy - fi - if [ "$LIBS" == "pandas" ] || [ "$LIBS" == "all" ] ; then - pip install pandas - fi + pip install pytest pytz pathlib numpy pandas + - name: Run tests run: | python --version - PYTEST_ARGS='-v --strict tests/test_bare.py tests/test_class.py tests/test_meta.py tests/test_enum.py' - export LIBS="${{ matrix.libraries }}" - if [ "$LIBS" == "vanilla" ] ; then - py.test $PYTEST_ARGS - elif [ "$LIBS" == "tz" ] ; then - py.test $PYTEST_ARGS tests/test_tz.py - elif [ "$LIBS" == "path" ] ; then - py.test $PYTEST_ARGS tests/test_pathlib.py - elif [ "$LIBS" == "numpy" ] ; then - py.test $PYTEST_ARGS tests/test_np.py - elif [ "$LIBS" == "pandas" ] ; then - py.test $PYTEST_ARGS tests/test_pandas.py - elif [ "$LIBS" == "all" ] ; then - py.test -v --strict - else - echo "UNKNOWN LIBRARY '$LIBS'" - exit 1 - fi + pytest -v --strict From 1584f97d571de9ba6ab0e0e6b397129b8541c15c Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Tue, 17 Sep 2024 14:51:18 -0400 Subject: [PATCH 09/16] Add range object serialization --- ro_json/decoders.py | 7 +++++++ ro_json/encoders.py | 14 ++++++++++++++ ro_json/nonp.py | 6 ++++-- tests/test_range.py | 13 +++++++++++++ 4 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 tests/test_range.py diff --git a/ro_json/decoders.py b/ro_json/decoders.py index c41c525..ad9e8b0 100644 --- a/ro_json/decoders.py +++ b/ro_json/decoders.py @@ -159,6 +159,13 @@ def slice_hook(dct): return dct return slice(dct['start'], dct['stop'], dct['step']) +def range_hook(dct): + if not isinstance(dct, dict): + return dct + if not '__range__' in dct: + return dct + return range(dct['start'], dct['stop'], dct['step']) + class EnumInstanceHook(ClassInstanceHookBase): """ diff --git a/ro_json/encoders.py b/ro_json/encoders.py index 08dcaf2..32094c6 100644 --- a/ro_json/encoders.py +++ b/ro_json/encoders.py @@ -347,6 +347,20 @@ def slice_encode(obj, primitives=False): ('step', obj.step), )) +def range_encode(obj, primitives=False): + if not isinstance(obj, range): + return obj + + if primitives: + return [obj.start, obj.stop, obj.step] + else: + return hashodict(( + ('__range__', True), + ('start', obj.start), + ('stop', obj.stop), + ('step', obj.step), + )) + class ClassInstanceEncoder(JSONEncoder): """ See `class_instance_encoder`. diff --git a/ro_json/nonp.py b/ro_json/nonp.py index d779833..1dcef83 100644 --- a/ro_json/nonp.py +++ b/ro_json/nonp.py @@ -10,13 +10,13 @@ from .encoders import TricksEncoder, json_date_time_encode, \ class_instance_encode, json_complex_encode, json_set_encode, numeric_types_encode, numpy_encode, \ nonumpy_encode, nopandas_encode, pandas_encode, noenum_instance_encode, \ - enum_instance_encode, pathlib_encode, bytes_encode, slice_encode # keep 'unused' imports + enum_instance_encode, pathlib_encode, bytes_encode, slice_encode, range_encode # keep 'unused' imports from .decoders import TricksPairHook, \ json_date_time_hook, ClassInstanceHook, \ json_complex_hook, json_set_hook, numeric_types_hook, json_numpy_obj_hook, \ json_nonumpy_obj_hook, \ nopandas_hook, pandas_hook, EnumInstanceHook, \ - noenum_hook, pathlib_hook, nopathlib_hook, json_bytes_hook, slice_hook # keep 'unused' imports + noenum_hook, pathlib_hook, nopathlib_hook, json_bytes_hook, slice_hook, range_hook # keep 'unused' imports ENCODING = 'UTF-8' @@ -33,6 +33,7 @@ class_instance_encode, bytes_encode, slice_encode, + range_encode, ] DEFAULT_HOOKS = [ @@ -43,6 +44,7 @@ _cih_instance, json_bytes_hook, slice_hook, + range_hook, ] diff --git a/tests/test_range.py b/tests/test_range.py new file mode 100644 index 0000000..6b8c22c --- /dev/null +++ b/tests/test_range.py @@ -0,0 +1,13 @@ +from ro_json import dumps, loads + +def test_range(): + original_range = range(0, 10, 2) + json_range = dumps(original_range) + loaded_range = loads(json_range) + assert original_range == loaded_range + +def test_range_no_step(): + original_range = range(0, 5) + json_range = dumps(original_range) + loaded_range = loads(json_range) + assert original_range == loaded_range From b623e99cd6337a199fc06b843f4cacceb6751711 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 17 Sep 2024 17:58:57 -0400 Subject: [PATCH 10/16] Rename to ro dash json --- README.md | 10 +++++----- setup.py | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index ddfc5a3..4c3be4e 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Despite contributing this enhancement to the original project (see [Pull Request # ro_json -The [pyjson-tricks] package brings several pieces of +The [ro-json] package brings several pieces of functionality to python handling of json files: 1. **Store and load numpy arrays** in human-readable format. @@ -18,9 +18,9 @@ functionality to python handling of json files: As well as compression and disallowing duplicate keys. -* Code: -* Documentation: -* PIP: +* Code: + +* PIP: Several keys of the format `__keyname__` have special meanings, and more might be added in future releases. @@ -354,7 +354,7 @@ of the comment case, so if you are expecting comments make sure to set # Preserve type vs use primitive By default, types are encoded such that they can be restored to their -original type when loaded with `json-tricks`. Example encodings in this +original type when loaded with `ro-json`. Example encodings in this documentation refer to that format. You can also choose to store things as their closest primitive type diff --git a/setup.py b/setup.py index 255ca47..19ddbb4 100644 --- a/setup.py +++ b/setup.py @@ -6,13 +6,13 @@ from ro_json._version import VERSION setup( - name='ro_json', + name='ro-json', version=VERSION, description='Extra features for Python\'s JSON: comments, order, numpy, ' 'pandas, datetimes, and many more! Simple but customizable.', long_description=readme, long_description_content_type='text/markdown', - url='https://github.com/ramonaoptics/ro_json', + url='https://github.com/ramonaoptics/ro-json', author='Clay Dugo', author_email='clay@ramonaoptics.com', license='BSD-3-Clause', From d6b01a3484860938fcdf237d03bfc66a0e31d694 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 17 Sep 2024 18:07:35 -0400 Subject: [PATCH 11/16] Get miniver installed --- .gitattributes | 1 + .github/workflows/tests.yml | 24 +++-- ro_json/__init__.py | 5 +- ro_json/_static_version.py | 12 +++ ro_json/_version.py | 194 +++++++++++++++++++++++++++++++++++- setup.py | 20 +++- tests/test_meta.py | 11 +- 7 files changed, 247 insertions(+), 20 deletions(-) create mode 100644 .gitattributes create mode 100644 ro_json/_static_version.py diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b78365c --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +ro_json/_static_version.py export-subst \ No newline at end of file diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 5c0f1b9..0483d0c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: ro_json +name: Testing on: push: @@ -10,20 +10,24 @@ jobs: build: name: Tests on Python ${{ matrix.python-version }} runs-on: ubuntu-latest - strategy: matrix: - python-version: [ - '3.10', - '3.11', - '3.12' - ] + python-version: ['3.10', '3.11', '3.12'] max-parallel: 8 fail-fast: false steps: - uses: actions/checkout@v3 - + - uses: proudust/gh-describe@v2 + # id needed to generate the outputs + id: ghd + - name: Check outputs + run: | + echo "describe : ${{ steps.ghd.outputs.describe }}" + echo "tag : ${{ steps.ghd.outputs.tag }}" + echo "distance : ${{ steps.ghd.outputs.distance }}" + echo "sha : ${{ steps.ghd.outputs.sha }}" + echo "short-sha : ${{ steps.ghd.outputs.short-sha }}" - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -31,10 +35,12 @@ jobs: - name: Install dependencies run: | + export RO_JSON_GIT_DESCRIBE=${{ steps.ghd.outputs.describe }} python -m pip install --upgrade pip - pip install pytest pytz pathlib numpy pandas + pip install setuptools pytest pytz pathlib numpy pandas - name: Run tests run: | + export RO_JSON_GIT_DESCRIBE=${{ steps.ghd.outputs.describe }} python --version pytest -v --strict diff --git a/ro_json/__init__.py b/ro_json/__init__.py index af85f1b..5a0cff2 100644 --- a/ro_json/__init__.py +++ b/ro_json/__init__.py @@ -13,10 +13,6 @@ numeric_types_hook, ClassInstanceHook, json_set_hook, pandas_hook, nopandas_hook, json_numpy_obj_hook, \ json_nonumpy_obj_hook, pathlib_hook, json_bytes_hook from .nonp import dumps, dump, loads, load -from ._version import VERSION - -__version__ = VERSION - try: # find_module takes just as long as importing, so no optimization possible @@ -30,3 +26,4 @@ # from .np_utils import encode_scalars_inplace +from ._version import __version__ diff --git a/ro_json/_static_version.py b/ro_json/_static_version.py new file mode 100644 index 0000000..5557f9b --- /dev/null +++ b/ro_json/_static_version.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# This file is part of 'miniver': https://github.com/jbweston/miniver +# +# This file will be overwritten by setup.py when a source or binary +# distribution is made. The magic value "__use_git__" is interpreted by +# version.py. + +version = "__use_git__" + +# These values are only set if the distribution was created with 'git archive' +refnames = "$Format:%D$" +git_hash = "$Format:%h$" diff --git a/ro_json/_version.py b/ro_json/_version.py index 285aa97..633703d 100644 --- a/ro_json/_version.py +++ b/ro_json/_version.py @@ -1,2 +1,194 @@ -VERSION = '1.0.0' +# -*- coding: utf-8 -*- +# This file is part of 'miniver': https://github.com/jbweston/miniver +# +from collections import namedtuple +import os +import subprocess +from setuptools.command.build_py import build_py as build_py_orig +from setuptools.command.sdist import sdist as sdist_orig + +Version = namedtuple("Version", ("release", "dev", "labels")) + +# No public API +__all__ = [] + +package_root = os.path.dirname(os.path.realpath(__file__)) +package_name = os.path.basename(package_root) + +STATIC_VERSION_FILE = "_static_version.py" + + +def get_version(version_file=STATIC_VERSION_FILE): + version_info = get_static_version_info(version_file) + version = version_info["version"] + if version == "__use_git__": + version = get_version_from_git() + if not version: + version = get_version_from_git_archive(version_info) + if not version: + version = Version("unknown", None, None) + return pep440_format(version) + else: + return version + + +def get_static_version_info(version_file=STATIC_VERSION_FILE): + version_info = {} + with open(os.path.join(package_root, version_file), "rb") as f: + exec(f.read(), {}, version_info) + return version_info + + +def version_is_from_git(version_file=STATIC_VERSION_FILE): + return get_static_version_info(version_file)["version"] == "__use_git__" + + +def pep440_format(version_info): + release, dev, labels = version_info + + version_parts = [release] + if dev: + if release.endswith("-dev") or release.endswith(".dev"): + version_parts.append(dev) + else: # prefer PEP440 over strict adhesion to semver + version_parts.append(".post{}".format(dev)) + + if labels: + version_parts.append("+") + version_parts.append(".".join(labels)) + + return "".join(version_parts) + + +def get_version_from_git(): + # git describe --first-parent does not take into account tags from branches + # that were merged-in. The '--long' flag gets us the 'dev' version and + # git hash, '--always' returns the git hash even if there are no tags. + for opts in [["--first-parent"], []]: + try: + p = subprocess.Popen( + ["git", "describe", "--tags", "--long", "--always"] + opts, + cwd=package_root, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + except OSError: + return + if p.wait() == 0: + break + else: + return + + if os.environ.get("RO_JSON_GIT_DESCRIBE", None): + git_describe = os.environ["RO_JSON_GIT_DESCRIBE"] + else: + git_describe = p.communicate()[0].decode() + + description = ( + git_describe + .strip("v") # Tags can have a leading 'v', but the version should not + .rstrip("\n") + .rsplit("-", 2) # Split the latest tag, commits since tag, and hash + ) + + try: + release, dev, git = description + except ValueError: # No tags, only the git hash + # prepend 'g' to match with format returned by 'git describe' + git = "g{}".format(*description) + release = "unknown" + dev = None + + labels = [] + if dev == "0": + dev = None + else: + labels.append(git) + + try: + p = subprocess.Popen(["git", "diff", "--quiet"], cwd=package_root) + except OSError: + labels.append("confused") # This should never happen. + else: + if p.wait() == 1: + labels.append("dirty") + + return Version(release, dev, labels) + + +# TODO: change this logic when there is a git pretty-format +# that gives the same output as 'git describe'. +# Currently we can only tell the tag the current commit is +# pointing to, or its hash (with no version info) +# if it is not tagged. +def get_version_from_git_archive(version_info): + try: + refnames = version_info["refnames"] + git_hash = version_info["git_hash"] + except KeyError: + # These fields are not present if we are running from an sdist. + # Execution should never reach here, though + return None + + if git_hash.startswith("$Format") or refnames.startswith("$Format"): + # variables not expanded during 'git archive' + return None + + VTAG = "tag: v" + refs = set(r.strip() for r in refnames.split(",")) + version_tags = set(r[len(VTAG) :] for r in refs if r.startswith(VTAG)) + if version_tags: + release, *_ = sorted(version_tags) # prefer e.g. "2.0" over "2.0rc1" + return Version(release, dev=None, labels=None) + else: + return Version("unknown", dev=None, labels=["g{}".format(git_hash)]) + + +__version__ = get_version() + + +# The following section defines a 'get_cmdclass' function +# that can be used from setup.py. The '__version__' module +# global is used (but not modified). + + +def _write_version(fname): + # This could be a hard link, so try to delete it first. Is there any way + # to do this atomically together with opening? + try: + os.remove(fname) + except OSError: + pass + with open(fname, "w") as f: + f.write( + "# This file has been created by setup.py.\n" + "version = '{}'\n".format(__version__) + ) + + +def get_cmdclass(pkg_source_path): + class _build_py(build_py_orig): + def run(self): + super().run() + + src_marker = "".join(["src", os.path.sep]) + + if pkg_source_path.startswith(src_marker): + path = pkg_source_path[len(src_marker):] + else: + path = pkg_source_path + _write_version( + os.path.join( + self.build_lib, path, STATIC_VERSION_FILE + ) + ) + + class _sdist(sdist_orig): + def make_release_tree(self, base_dir, files): + super().make_release_tree(base_dir, files) + _write_version( + os.path.join(base_dir, pkg_source_path, STATIC_VERSION_FILE) + ) + + return dict(sdist=_sdist, build_py=_build_py) diff --git a/setup.py b/setup.py index 19ddbb4..fde9662 100644 --- a/setup.py +++ b/setup.py @@ -3,11 +3,26 @@ with open('README.md', 'r', encoding='utf-8') as fh: readme = fh.read() -from ro_json._version import VERSION +def get_version_and_cmdclass(pkg_path): + """Load version.py module without importing the whole package. + + Template code from miniver + """ + import os + from importlib.util import module_from_spec, spec_from_file_location + + spec = spec_from_file_location("version", os.path.join(pkg_path, "_version.py")) + module = module_from_spec(spec) + spec.loader.exec_module(module) + return module.__version__, module.get_cmdclass(pkg_path) + + +version, cmdclass = get_version_and_cmdclass(r"ro_json") setup( name='ro-json', - version=VERSION, + version=version, + cmdclass=cmdclass, description='Extra features for Python\'s JSON: comments, order, numpy, ' 'pandas, datetimes, and many more! Simple but customizable.', long_description=readme, @@ -22,7 +37,6 @@ 'Natural Language :: English', 'License :: OSI Approved :: BSD License', 'Operating System :: OS Independent', - 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', diff --git a/tests/test_meta.py b/tests/test_meta.py index 1f76fce..1c6043d 100644 --- a/tests/test_meta.py +++ b/tests/test_meta.py @@ -1,7 +1,12 @@ - +import ro_json import re def test_version(): - import ro_json - assert re.match(r'^\d+\.\d+\.\d+$', ro_json.__version__) is not None + # The version shall be compatible with + # packaging.version.Version + # and enable comparison + assert re.match(r'^\d+\.\d+\.\d+.*$', '1.2.3') is not None + assert re.match(r'^\d+\.\d+\.\d+.*$', '1.2.3.post1') is not None + assert re.match(r'^\d+\.\d+\.\d+.*$', '1.2.3.post13+g7cb3d69.dirty') is not None + assert re.match(r'^\d+\.\d+\.\d+.*$', ro_json.__version__) is not None From b056c7d31712e06eccd43235fc71ea59487b8de3 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 17 Sep 2024 19:01:57 -0400 Subject: [PATCH 12/16] Actually install things during tests --- .github/workflows/tests.yml | 5 +++++ setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 0483d0c..8f14fc8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -39,6 +39,11 @@ jobs: python -m pip install --upgrade pip pip install setuptools pytest pytz pathlib numpy pandas + - name: Install package + run: | + export RO_JSON_GIT_DESCRIBE=${{ steps.ghd.outputs.describe }} + python -m pip install . -vvv + - name: Run tests run: | export RO_JSON_GIT_DESCRIBE=${{ steps.ghd.outputs.describe }} diff --git a/setup.cfg b/setup.cfg index befa856..9b1d62f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bdist_wheel] universal = 1 [metadata] -description-file = README.rst +description_file = README.md license_file = LICENSE.txt diff --git a/setup.py b/setup.py index fde9662..5abd125 100644 --- a/setup.py +++ b/setup.py @@ -17,7 +17,7 @@ def get_version_and_cmdclass(pkg_path): return module.__version__, module.get_cmdclass(pkg_path) -version, cmdclass = get_version_and_cmdclass(r"ro_json") +version, cmdclass = get_version_and_cmdclass("ro_json") setup( name='ro-json', From 78d01a29005b4230dda91a3859cf47c3ab25f9da Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 17 Sep 2024 19:39:47 -0400 Subject: [PATCH 13/16] Not universal wheel --- setup.cfg | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 setup.cfg diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 9b1d62f..0000000 --- a/setup.cfg +++ /dev/null @@ -1,5 +0,0 @@ -[bdist_wheel] -universal = 1 -[metadata] -description_file = README.md -license_file = LICENSE.txt From 70f237bc87f86960d2f0d8989b1ccdc9db6a8d84 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Tue, 17 Sep 2024 19:43:22 -0400 Subject: [PATCH 14/16] Add comment to pypi creation --- .github/workflows/pypi.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pypi.yaml b/.github/workflows/pypi.yaml index 1a4a29d..33906e9 100644 --- a/.github/workflows/pypi.yaml +++ b/.github/workflows/pypi.yaml @@ -18,6 +18,7 @@ jobs: steps: - uses: actions/checkout@v3 with: + # https://github.com/actions/checkout#fetch-all-history-for-all-tags-and-branches fetch-depth: 0 - name: Set up Python From 491fea44201b8a705dbd94d932508625ad0c2aa5 Mon Sep 17 00:00:00 2001 From: Clay Dugo Date: Wed, 18 Sep 2024 10:36:18 -0400 Subject: [PATCH 15/16] Dummy commit --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 4c3be4e..9464586 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ > [!NOTE] + >The primary reason for this fork is to enable full round-trip serialization and deserialization of NumPy scalars and 0-dimensional arrays to JSON and back. This feature is essential for applications that require precise data preservation when working with NumPy data types. Despite contributing this enhancement to the original project (see [Pull Request #99](https://github.com/mverleg/pyjson_tricks/pull/99)), there was a difference in opinion with the maintainer regarding its inclusion. As a result, this fork aims to continue development with this functionality integrated. From a865991ebfd56af2dec876624fa628d9e0ba3362 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Wed, 18 Sep 2024 17:21:33 -0400 Subject: [PATCH 16/16] Test without numpy and other optional dependencies --- .github/workflows/tests.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 8f14fc8..d9fde03 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -13,6 +13,13 @@ jobs: strategy: matrix: python-version: ['3.10', '3.11', '3.12'] + include: + - python-version: 3.10 + with-numpy: true + - python-version: 3.11 + with-numpy: false + - python-version: 3.12 + with-numpy: true max-parallel: 8 fail-fast: false @@ -37,7 +44,10 @@ jobs: run: | export RO_JSON_GIT_DESCRIBE=${{ steps.ghd.outputs.describe }} python -m pip install --upgrade pip - pip install setuptools pytest pytz pathlib numpy pandas + pip install setuptools pytest pathlib + if [ "${{ matrix.with-numpy }}" = "true" ]; then + pytz numpy pandas + fi - name: Install package run: |