From 3d69ea04ce01d19ea761124c730a996a53c25e49 Mon Sep 17 00:00:00 2001 From: Vinit Kumar Date: Sat, 28 Aug 2021 21:01:28 +0530 Subject: [PATCH] Feat/fork and update dict2xml (#75) * feat: first commit with dict2xml fork * feat: some cleanup * chore: format with black * feat: remove legacy code for python2 support * chore: cleanup some more code * feat: some more pylint improvements * feat: make code more idiomatic python * chore: some more cleanup * feat: some more cleanup * feat: add f-strings for easier formatting * feat: remove xmltodict as it is a test only dep * chore: plan for removing requests from dep as well * fix: make flake8 pass * feat: make flake8 consult setup.cfg * feat: test with 3.10.0-rc.1 (#71) * feat: test with python3.10 b1 * feat: add support for python3.10 beta versio https://github.com/jaraco/keyring/commit/a5f055a705b0098f31b7b608aea09605eacd9751 - Github Issue: Authored-by: Vinit Kumar Signed-off-by: Vinit Kumar * fix: some type warnings, thanks to the latest video @asottile * feat: directly give the 3.10.0-beta3 * feat: add 3.10 beta 4 support * feat: only support latest ubuntu, macos and windows for now * feat: update classifiers * Update to Python 3.10 rc1 * Update pythonpackage.yml * fix: allow exit-zero for flake8 config * :memo: docs: update the readme to reflect fork of dicttoxml We are upgrading dicttoxml by forking and adding it to the code * :bookmark: v3.7.0beta1 Release --- .github/workflows/pythonpackage.yml | 9 +- README.rst | 6 + json2xml/__init__.py | 2 +- json2xml/dicttoxml.py | 392 ++++++++++++++++++++++++++++ json2xml/json2xml.py | 14 +- json2xml/utils.py | 9 +- requirements_prod.txt | 2 - setup.cfg | 10 + setup.py | 6 +- tests/test_json2xml.py | 22 +- 10 files changed, 452 insertions(+), 20 deletions(-) create mode 100755 json2xml/dicttoxml.py diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index 0f066fc..b0395e1 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -8,11 +8,9 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.6, 3.7, 3.8, 3.9, pypy3] + python-version: [3.6, 3.7, 3.8, 3.9, pypy3, '3.10.0-rc.1'] os: [ ubuntu-20.04, - ubuntu-18.04, - ubuntu-16.04, macOS-latest, windows-latest, ] @@ -32,10 +30,9 @@ jobs: - name: Lint with flake8 run: | pip install flake8 + pip install xmltodict==0.12.0 # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 json2xml/ --exit-zero - name: Test with pytest run: | python setup.py test diff --git a/README.rst b/README.rst index e33edcf..e09f721 100644 --- a/README.rst +++ b/README.rst @@ -30,6 +30,12 @@ Simple Python Library to convert JSON to XML * Documentation: https://json2xml.readthedocs.io. +Update +------ + +The dict2xml project has been forked and integrated in the project itself. This helped with cleaning up the code +and also doing improvements. The goal is to remove all the dependencies from this project. + Features -------- diff --git a/json2xml/__init__.py b/json2xml/__init__.py index afa8f47..1eefda4 100644 --- a/json2xml/__init__.py +++ b/json2xml/__init__.py @@ -4,7 +4,7 @@ __author__ = """Vinit Kumar""" __email__ = "mail@vinitkumar.me" -__version__ = "3.6.0" +__version__ = "3.7.0b1" # from .utils import readfromurl, readfromstring, readfromjson diff --git a/json2xml/dicttoxml.py b/json2xml/dicttoxml.py new file mode 100755 index 0000000..4bf99eb --- /dev/null +++ b/json2xml/dicttoxml.py @@ -0,0 +1,392 @@ +#!/usr/bin/env python +# coding: utf-8 + +import collections +import logging +import numbers +from random import randint +from xml.dom.minidom import parseString + + +from typing import Dict, Any + +LOG = logging.getLogger("dicttoxml") + +""" +Converts a Python dictionary or other native data type into a valid XML string. + +Supports item (`int`, `float`, `long`, `decimal.Decimal`, `bool`, `str`, `unicode`, `datetime`, `none` and other + number-like objects) and collection (`list`, `set`, `tuple` and `dict`, as well as iterable and + dict-like objects) data types, with arbitrary nesting for the collections. + Items with a `datetime` type are converted to ISO format strings. + Items with a `None` type become empty XML elements. +This module works with both Python 2 and 3. +""" + + +ids = [] # initialize list of unique ids + + +def make_id(element, start=100000, end=999999): + """Returns a random integer""" + return "%s_%s" % (element, randint(start, end)) + + +def get_unique_id(element): + """Returns a unique id for a given element""" + this_id = make_id(element) + dup = True + while dup: + if this_id not in ids: + dup = False + ids.append(this_id) + else: + this_id = make_id(element) + return ids[-1] + + +def get_xml_type(val): + """Returns the data type for the xml type attribute""" + if type(val).__name__ in ("str", "unicode"): + return "str" + if type(val).__name__ in ("int", "long"): + return "int" + if type(val).__name__ == "float": + return "float" + if type(val).__name__ == "bool": + return "bool" + if isinstance(val, numbers.Number): + return "number" + if type(val).__name__ == "NoneType": + return "null" + if isinstance(val, dict): + return "dict" + if isinstance(val, collections.Iterable): + return "list" + return type(val).__name__ + + +def escape_xml(s: str) -> str: + + if isinstance(s, str): + s = str(s) # avoid UnicodeDecodeError + s = s.replace("&", "&") + s = s.replace('"', """) + s = s.replace("'", "'") + s = s.replace("<", "<") + s = s.replace(">", ">") + return s + + +def make_attrstring(attr): + """Returns an attribute string in the form key="val" """ + attrstring = " ".join(['%s="%s"' % (k, v) for k, v in attr.items()]) + return "%s%s" % (" " if attrstring != "" else "", attrstring) + + +def key_is_valid_xml(key): + """Checks that a key is a valid XML name""" + LOG.info('Inside key_is_valid_xml(). Testing "%s"' % (str(key))) + test_xml = '<%s>foo' % (key, key) + try: + parseString(test_xml) + return True + except Exception: # minidom does not implement exceptions well + return False + + +def make_valid_xml_name(key, attr: Dict[str, Any]): + """Tests an XML name and fixes it if invalid""" + LOG.info( + 'Inside make_valid_xml_name(). Testing key "%s" with attr "%s"' + % (str(key), str(attr)) + ) + key = escape_xml(key) + # nothing happens at escape_xml if attr is not a string, we don't + # need to pass it to the method at all. + # attr = escape_xml(attr) + + # pass through if key is already valid + if key_is_valid_xml(key): + return key, attr + + # prepend a lowercase n if the key is numeric + if key.isdigit(): + return "n%s" % (key), attr + + # replace spaces with underscores if that fixes the problem + if key_is_valid_xml(key.replace(" ", "_")): + return key.replace(" ", "_"), attr + + # key is still invalid - move it into a name attribute + attr["name"] = key + key = "key" + return key, attr + + +def wrap_cdata(s: str) -> str: + """Wraps a string into CDATA sections""" + s = str(s).replace("]]>", "]]]]>") + return "" + + +def default_item_func(parent): + return "item" + + +def convert(obj, ids, attr_type, item_func, cdata, parent="root"): + """Routes the elements of an object to the right function to convert them + based on their data type""" + + LOG.info( + 'Inside convert(). obj type is: "%s", obj="%s"' % (type(obj).__name__, str(obj)) + ) + + item_name = item_func(parent) + + if isinstance(obj, numbers.Number) or isinstance(obj, str): + return convert_kv(item_name, obj, attr_type, cdata) + + if hasattr(obj, "isoformat"): + return convert_kv(item_name, obj.isoformat(), attr_type, cdata) + + if isinstance(obj, bool): + return convert_bool(item_name, obj, attr_type, cdata) + + if obj is None: + return convert_none(item_name, "", attr_type, cdata) + + if isinstance(obj, dict): + return convert_dict(obj, ids, parent, attr_type, item_func, cdata) + + if isinstance(obj, collections.Iterable): + return convert_list(obj, ids, parent, attr_type, item_func, cdata) + + raise TypeError("Unsupported data type: %s (%s)" % (obj, type(obj).__name__)) + + +def convert_dict(obj, ids, parent, attr_type, item_func, cdata): + """Converts a dict into an XML string.""" + LOG.info( + 'Inside convert_dict(): obj type is: "%s", obj="%s"' + % (type(obj).__name__, str(obj)) + ) + output = [] + addline = output.append + + for key, val in obj.items(): + LOG.info( + 'Looping inside convert_dict(): key="%s", val="%s", type(val)="%s"' + % (str(key), str(val), type(val).__name__) + ) + + attr = {} if not ids else {"id": "%s" % (get_unique_id(parent))} + + key, attr = make_valid_xml_name(key, attr) + + if isinstance(val, numbers.Number) or isinstance(val, str): + addline(convert_kv(key, val, attr_type, attr, cdata)) + + elif hasattr(val, "isoformat"): # datetime + addline(convert_kv(key, val.isoformat(), attr_type, attr, cdata)) + + elif isinstance(val, bool): + addline(convert_bool(key, val, attr_type, attr, cdata)) + + elif isinstance(val, dict): + if attr_type: + attr["type"] = get_xml_type(val) + addline( + "<%s%s>%s" + % ( + key, + make_attrstring(attr), + convert_dict(val, ids, key, attr_type, item_func, cdata), + key, + ) + ) + + elif isinstance(val, collections.Iterable): + if attr_type: + attr["type"] = get_xml_type(val) + addline( + "<%s%s>%s" + % ( + key, + make_attrstring(attr), + convert_list(val, ids, key, attr_type, item_func, cdata), + key, + ) + ) + + elif val is None: + addline(convert_none(key, val, attr_type, attr, cdata)) + + else: + raise TypeError( + "Unsupported data type: %s (%s)" % (val, type(val).__name__) + ) + + return "".join(output) + + +def convert_list(items, ids, parent, attr_type, item_func, cdata): + """Converts a list into an XML string.""" + LOG.info("Inside convert_list()") + output = [] + addline = output.append + + item_name = item_func(parent) + this_id = None + if ids: + this_id = get_unique_id(parent) + + for i, item in enumerate(items): + LOG.info( + 'Looping inside convert_list(): item="%s", item_name="%s", type="%s"' + % (str(item), item_name, type(item).__name__) + ) + attr = {} if not ids else {"id": "%s_%s" % (this_id, i + 1)} + if isinstance(item, numbers.Number) or isinstance(item, str): + addline(convert_kv(item_name, item, attr_type, attr, cdata)) + + elif hasattr(item, "isoformat"): # datetime + addline(convert_kv(item_name, item.isoformat(), attr_type, attr, cdata)) + + elif isinstance(item, bool): + addline(convert_bool(item_name, item, attr_type, attr, cdata)) + + elif isinstance(item, dict): + if not attr_type: + addline( + "<%s>%s" + % ( + item_name, + convert_dict(item, ids, parent, attr_type, item_func, cdata), + item_name, + ) + ) + else: + addline( + '<%s type="dict">%s' + % ( + item_name, + convert_dict(item, ids, parent, attr_type, item_func, cdata), + item_name, + ) + ) + + elif isinstance(item, collections.Iterable): + if not attr_type: + addline( + "<%s %s>%s" + % ( + item_name, + make_attrstring(attr), + convert_list(item, ids, item_name, attr_type, item_func, cdata), + item_name, + ) + ) + else: + addline( + '<%s type="list"%s>%s' + % ( + item_name, + make_attrstring(attr), + convert_list(item, ids, item_name, attr_type, item_func, cdata), + item_name, + ) + ) + + elif item is None: + addline(convert_none(item_name, None, attr_type, attr, cdata)) + + else: + raise TypeError( + "Unsupported data type: %s (%s)" % (item, type(item).__name__) + ) + return "".join(output) + + +def convert_kv(key, val, attr_type, attr={}, cdata: bool = False): + """Converts a number or string into an XML element""" + LOG.info( + 'Inside convert_kv(): key="%s", val="%s", type(val) is: "%s"' + % (str(key), str(val), type(val).__name__) + ) + + key, attr = make_valid_xml_name(key, attr) + + if attr_type: + attr["type"] = get_xml_type(val) + attrstring = make_attrstring(attr) + return "<%s%s>%s" % ( + key, + attrstring, + wrap_cdata(val) if cdata else escape_xml(val), + key, + ) + + +def convert_bool(key, val, attr_type, attr={}, cdata=False): + """Converts a boolean into an XML element""" + LOG.info( + 'Inside convert_bool(): key="%s", val="%s", type(val) is: "%s"' + % (str(key), str(val), type(val).__name__) + ) + + key, attr = make_valid_xml_name(key, attr) + + if attr_type: + attr["type"] = get_xml_type(val) + attrstring = make_attrstring(attr) + return "<%s%s>%s" % (key, attrstring, str(val).lower(), key) + + +def convert_none(key, val, attr_type, attr={}, cdata=False): + """Converts a null value into an XML element""" + LOG.info('Inside convert_none(): key="%s"' % (str(key))) + + key, attr = make_valid_xml_name(key, attr) + + if attr_type: + attr["type"] = get_xml_type(val) + attrstring = make_attrstring(attr) + return "<%s%s>" % (key, attrstring, key) + + +def dicttoxml( + obj, + root: bool = True, + custom_root="root", + ids=False, + attr_type=True, + item_func=default_item_func, + cdata=False, +): + """Converts a python object into XML. + Arguments: + - root specifies whether the output is wrapped in an XML root element + Default is True + - custom_root allows you to specify a custom root element. + Default is 'root' + - ids specifies whether elements get unique ids. + Default is False + - attr_type specifies whether elements get a data type attribute. + Default is True + - item_func specifies what function should generate the element name for + items in a list. + Default is 'item' + - cdata specifies whether string values should be wrapped in CDATA sections. + Default is False + """ + LOG.info( + 'Inside dicttoxml(): type(obj) is: "%s", obj="%s"' + % (type(obj).__name__, str(obj)) + ) + output = [] + output.append('') + output.append( + f"<{custom_root}>{convert(obj, ids, attr_type, item_func, cdata, parent=custom_root)}" + ) + return "".join(output).encode("utf-8") diff --git a/json2xml/json2xml.py b/json2xml/json2xml.py index d556249..3809e83 100644 --- a/json2xml/json2xml.py +++ b/json2xml/json2xml.py @@ -1,12 +1,14 @@ # -*- coding: utf-8 -*- +from typing import Optional, Any from xml.dom.minidom import parseString -import dicttoxml +from json2xml import dicttoxml class Json2xml: def __init__( self, data: str, wrapper: str = "all", + root: bool = True, pretty: bool = True, attr_type: bool = True ): @@ -14,13 +16,19 @@ def __init__( self.pretty = pretty self.wrapper = wrapper self.attr_type = attr_type + self.root = root - def to_xml(self): + def to_xml(self) -> Optional[Any]: """ Convert to xml using dicttoxml.dicttoxml and then pretty print it. """ if self.data: - xml_data = dicttoxml.dicttoxml(self.data, custom_root=self.wrapper, attr_type=self.attr_type) + xml_data = dicttoxml.dicttoxml( + self.data, + root=self.root, + custom_root=self.wrapper, + attr_type=self.attr_type + ) if self.pretty: return parseString(xml_data).toprettyxml() return xml_data diff --git a/json2xml/utils.py b/json2xml/utils.py index a727b4f..ecb2ac9 100644 --- a/json2xml/utils.py +++ b/json2xml/utils.py @@ -1,5 +1,6 @@ """Utils methods to convert XML data to dict from various sources""" import json +from typing import Dict, Optional import requests @@ -16,7 +17,7 @@ class StringReadError(Exception): -def readfromjson(filename: str) -> dict: +def readfromjson(filename: str) -> Dict[str, str]: """ Reads a json string and emits json string """ @@ -33,10 +34,12 @@ def readfromjson(filename: str) -> dict: raise JSONReadError("Invalid JSON File") -def readfromurl(url: str, params: dict = None) -> dict: +def readfromurl(url: str, params: Optional[Dict[str, str]] = None) -> Dict[str, str]: """ Loads json from an URL over the internets """ + # TODO: See if we can remove requests too from the deps too. Then, we will become + # zero deps. refernce link here: https://bit.ly/3gzICjU response = requests.get(url, params=params) if response.status_code == 200: data = response.json() @@ -44,7 +47,7 @@ def readfromurl(url: str, params: dict = None) -> dict: raise URLReadError("URL is not returning correct response") -def readfromstring(jsondata: str) -> dict: +def readfromstring(jsondata: str) -> Dict[str, str]: """ Loads json from string """ diff --git a/requirements_prod.txt b/requirements_prod.txt index 6f44e7a..d1dd541 100644 --- a/requirements_prod.txt +++ b/requirements_prod.txt @@ -1,4 +1,2 @@ -dicttoxml==1.7.4 requests>=2.20.0 -xmltodict==0.11.0 diff --git a/setup.cfg b/setup.cfg index be4e117..8ffa5d3 100644 --- a/setup.cfg +++ b/setup.cfg @@ -16,7 +16,17 @@ universal = 1 [flake8] exclude = docs +max-line-length=120 [aliases] # Define setup.py command aliases here +[mypy] +check_untyped_defs = true +disallow_any_generics = true +disallow_incomplete_defs = true +disallow_untyped_defs = true +no_implicit_optional = true +warn_redundant_casts = true +warn_unused_ignores = true +ignore_missing_imports = true diff --git a/setup.py b/setup.py index f40a868..a1a3d94 100644 --- a/setup.py +++ b/setup.py @@ -22,13 +22,15 @@ author="Vinit Kumar", author_email="mail@vinitkumar.me", classifiers=[ - "Development Status :: 2 - Pre-Alpha", + "Development Status :: 6 - Mature", "Intended Audience :: Developers", "License :: OSI Approved :: Apache Software License", "Natural Language :: English", - "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", ], description="Simple Python Library to convert JSON to XML", install_requires=requirements, diff --git a/tests/test_json2xml.py b/tests/test_json2xml.py index 81d2eb2..3715311 100644 --- a/tests/test_json2xml.py +++ b/tests/test_json2xml.py @@ -24,8 +24,13 @@ def tearDown(self): def test_read_from_json(self): """Test something.""" - data = readfromjson("examples/licht.json") - assert type(data) is dict + data = readfromjson("examples/bigexample.json") + if type(data) == 'list': + # it's json array, so we just take the first element and check it's type + assert type(data[0]) is dict + else: + data = readfromjson("examples/licht.json") + assert type(data) is dict def test_read_from_invalid_json(self): """Test something.""" @@ -67,7 +72,18 @@ def test_custom_wrapper_and_indent(self): data = readfromstring( '{"login":"mojombo","id":1,"avatar_url":"https://avatars0.githubusercontent.com/u/1?v=4"}' ) - xmldata = json2xml.Json2xml(data, wrapper="test", pretty=True).to_xml() + xmldata = json2xml.Json2xml(data, root=False, wrapper="test", pretty=False).to_xml() + old_dict = xmltodict.parse(xmldata) + # test must be present, snce it is the wrpper + assert "test" in old_dict.keys() + # reverse test, say a wrapper called ramdom won't be present + assert "random" not in old_dict.keys() + + def test_no_wrapper_and_indent(self): + data = readfromstring( + '{"login":"mojombo","id":1,"avatar_url":"https://avatars0.githubusercontent.com/u/1?v=4"}' + ) + xmldata = json2xml.Json2xml(data, root=False, wrapper="test", pretty=False).to_xml() old_dict = xmltodict.parse(xmldata) # test must be present, snce it is the wrpper assert "test" in old_dict.keys()