From bfc4eb7bd2fb094875cf2132189b85abf39bdde1 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 19:49:12 +0200 Subject: [PATCH 01/18] Add .eggs to .gitignore --- .gitignore | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index fd2458b11..57fa4c488 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,9 @@ -*.py[cod] __pycache__ - +.eggs *.cache *.egg-info *.pdf +*.py[cod] # Development and build files .coverage From 22b60db933d31fb001b0b376722901b1843268af Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 19:51:16 +0200 Subject: [PATCH 02/18] Rename sdmx.writer to sdmx.writer.pandas --- sdmx/__init__.py | 2 +- sdmx/writer/__init__.py | 3 +++ sdmx/{writer.py => writer/pandas.py} | 0 3 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 sdmx/writer/__init__.py rename sdmx/{writer.py => writer/pandas.py} (100%) diff --git a/sdmx/__init__.py b/sdmx/__init__.py index d2ae4542a..e89ede2dc 100644 --- a/sdmx/__init__.py +++ b/sdmx/__init__.py @@ -3,7 +3,7 @@ from sdmx.api import Request, read_sdmx, read_url from sdmx.source import add_source, list_sources from sdmx.util import Resource -from sdmx.writer import write as to_pandas +from sdmx.writer import to_pandas import logging __all__ = [ diff --git a/sdmx/writer/__init__.py b/sdmx/writer/__init__.py new file mode 100644 index 000000000..7ca93b93e --- /dev/null +++ b/sdmx/writer/__init__.py @@ -0,0 +1,3 @@ +from .pandas import write as to_pandas + +__all__ = ['to_pandas'] diff --git a/sdmx/writer.py b/sdmx/writer/pandas.py similarity index 100% rename from sdmx/writer.py rename to sdmx/writer/pandas.py From 548ae19fcf2ad0238649374be7f1dc4c0e7a6c36 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 20:54:43 +0200 Subject: [PATCH 03/18] Move reader, writer tests to subfolders mirroring package layout --- sdmx/tests/{test_reader_json.py => reader/test_json.py} | 5 ++--- sdmx/tests/{ => reader}/test_reader_xml.py | 5 ++--- sdmx/tests/{test_writer.py => writer/test_pandas.py} | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) rename sdmx/tests/{test_reader_json.py => reader/test_json.py} (88%) rename sdmx/tests/{ => reader}/test_reader_xml.py (98%) rename sdmx/tests/{test_writer.py => writer/test_pandas.py} (99%) diff --git a/sdmx/tests/test_reader_json.py b/sdmx/tests/reader/test_json.py similarity index 88% rename from sdmx/tests/test_reader_json.py rename to sdmx/tests/reader/test_json.py index 9c9db7b9e..261fa15a1 100644 --- a/sdmx/tests/test_reader_json.py +++ b/sdmx/tests/reader/test_json.py @@ -1,7 +1,6 @@ -import sdmx import pytest - -from .data import specimen, test_files +import sdmx +from sdmx.tests.data import specimen, test_files @pytest.mark.parametrize('path', **test_files(format='json')) diff --git a/sdmx/tests/test_reader_xml.py b/sdmx/tests/reader/test_reader_xml.py similarity index 98% rename from sdmx/tests/test_reader_xml.py rename to sdmx/tests/reader/test_reader_xml.py index 2c822d186..c7936ceef 100644 --- a/sdmx/tests/test_reader_xml.py +++ b/sdmx/tests/reader/test_reader_xml.py @@ -1,12 +1,11 @@ from lxml.etree import Element +import pytest import sdmx from sdmx.model import ( Facet, FacetType, FacetValueType, ) from sdmx.reader.sdmxml import XMLParseError, Reader -import pytest - -from .data import specimen, test_files +from sdmx.tests.data import specimen, test_files # Read example data files diff --git a/sdmx/tests/test_writer.py b/sdmx/tests/writer/test_pandas.py similarity index 99% rename from sdmx/tests/test_writer.py rename to sdmx/tests/writer/test_pandas.py index dd928aa61..bed608894 100644 --- a/sdmx/tests/test_writer.py +++ b/sdmx/tests/writer/test_pandas.py @@ -6,8 +6,8 @@ import sdmx from sdmx.model import TimeDimension -from . import assert_pd_equal -from .data import expected_data, specimen, test_files +from sdmx.tests import assert_pd_equal +from sdmx.tests.data import expected_data, specimen, test_files # file name → (exception raised, exception message, comment/reason) From 0576785aff06be675cd2278ec8d361245c94ccc4 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 20:57:14 +0200 Subject: [PATCH 04/18] Add sdmx.format.xml for SDMX-ML info used by both reader and writer --- sdmx/format/xml.py | 23 +++++++++++++++++++++++ sdmx/reader/sdmxml.py | 20 +------------------- 2 files changed, 24 insertions(+), 19 deletions(-) create mode 100644 sdmx/format/xml.py diff --git a/sdmx/format/xml.py b/sdmx/format/xml.py new file mode 100644 index 000000000..29105a3be --- /dev/null +++ b/sdmx/format/xml.py @@ -0,0 +1,23 @@ +from functools import lru_cache + +from lxml.etree import QName + + +# XML Namespaces +_base_ns = 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1' +NS = { + 'com': f'{_base_ns}/common', + 'data': f'{_base_ns}/data/structurespecific', + 'str': f'{_base_ns}/structure', + 'mes': f'{_base_ns}/message', + 'gen': f'{_base_ns}/data/generic', + 'footer': f'{_base_ns}/message/footer', + 'xml': 'http://www.w3.org/XML/1998/namespace', + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', + } + + +@lru_cache() +def qname(ns, name): + """Return a fully-qualified tag *name* in namespace *ns*.""" + return QName(NS[ns], name) diff --git a/sdmx/reader/sdmxml.py b/sdmx/reader/sdmxml.py index eee3fe5fe..a67dc63ca 100644 --- a/sdmx/reader/sdmxml.py +++ b/sdmx/reader/sdmxml.py @@ -12,6 +12,7 @@ from lxml.etree import QName, XPath from sdmx.exceptions import ParseError, XMLParseError +from sdmx.format.xml import NS, qname from sdmx.message import ( DataMessage, ErrorMessage, Footer, Header, StructureMessage, ) @@ -45,25 +46,6 @@ r'(\.(?P.*))?') -# XML namespaces -_base_ns = 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1' -NS = { - 'com': f'{_base_ns}/common', - 'data': f'{_base_ns}/data/structurespecific', - 'str': f'{_base_ns}/structure', - 'mes': f'{_base_ns}/message', - 'gen': f'{_base_ns}/data/generic', - 'footer': f'{_base_ns}/message/footer', - 'xml': 'http://www.w3.org/XML/1998/namespace', - 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', - } - - -def qname(ns, name): - """Return a fully-qualified tag *name* in namespace *ns*.""" - return QName(NS[ns], name) - - _TO_SNAKE_RE = re.compile('([A-Z]+)') From 1bd98a7d6ed0cecff3dd6ffd82d1c79f8e160559 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 20:59:24 +0200 Subject: [PATCH 05/18] Add sdmx.urn.make, sdmx.urn.match --- sdmx/reader/sdmxml.py | 13 +++---------- sdmx/urn.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 10 deletions(-) create mode 100644 sdmx/urn.py diff --git a/sdmx/reader/sdmxml.py b/sdmx/reader/sdmxml.py index a67dc63ca..b31f294a7 100644 --- a/sdmx/reader/sdmxml.py +++ b/sdmx/reader/sdmxml.py @@ -33,19 +33,12 @@ ) from sdmx.reader import BaseReader +import sdmx.urn log = logging.getLogger(__name__) -# Regular expression for URNs used as references -URN = re.compile(r'urn:sdmx:org\.sdmx\.infomodel' - r'\.(?P[^\.]*)' - r'\.(?P[^=]*)=((?P[^:]*):)?' - r'(?P[^\(\.]*)(\((?P[\d\.]*)\))?' - r'(\.(?P.*))?') - - _TO_SNAKE_RE = re.compile('([A-Z]+)') @@ -459,7 +452,7 @@ def _maintained(self, cls=None, id=None, urn=None, **kwargs): regular expression. """ if urn: - match = URN.match(urn).groupdict() + match = sdmx.urn.match(urn) cls = get_class(match['package'], match['class']) id = match['id'] @@ -1028,7 +1021,7 @@ def parse_conceptidentity(self, elem): raise ValueError(values) # URN should refer to a Concept - match = URN.match(values['urn']).groupdict() + match = sdmx.urn.match(values['urn']) if match['class'] != 'Concept': raise ValueError(values['urn']) diff --git a/sdmx/urn.py b/sdmx/urn.py new file mode 100644 index 000000000..40de54bda --- /dev/null +++ b/sdmx/urn.py @@ -0,0 +1,28 @@ +import re + +from sdmx.model import Code, Codelist + + +# Regular expression for URNs +URN = re.compile(r'urn:sdmx:org\.sdmx\.infomodel' + r'\.(?P[^\.]*)' + r'\.(?P[^=]*)=((?P[^:]*):)?' + r'(?P[^\(\.]*)(\((?P[\d\.]*)\))?' + r'(\.(?P.*))?') + +_BASE = ( + 'urn:sdmx:org.sdmx.infomodel.{package}.{obj.__class__.__name__}=' + '{obj.maintainer.id}:{obj.id}({obj.version})' +) +_PACKAGE = { + Code: 'codelist', + Codelist: 'codelist', +} + + +def make(obj): + return _BASE.format(obj=obj, package=_PACKAGE[obj.__class__]) + + +def match(string): + return URN.match(string).groupdict() From 2f22e219b0e1a6af4353d9e1a148a660f31ef9cd Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 20:59:52 +0200 Subject: [PATCH 06/18] Adjust order of types in InternationalString validation --- sdmx/model.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sdmx/model.py b/sdmx/model.py index bcfefcc7e..f91a8898a 100644 --- a/sdmx/model.py +++ b/sdmx/model.py @@ -108,15 +108,15 @@ def __init__(self, value=None, **kwargs): and isinstance(value[0], str)): # 2-tuple of str is (locale, label) value = {value[0]: value[1]} + elif isinstance(value, dict): + # dict; use directly + pass elif isinstance(value, IterableABC): # Iterable of 2-tuples value = {locale: label for (locale, label) in value} elif value is None: # Keyword arguments → dict, possibly empty value = dict(kwargs) - elif isinstance(value, dict): - # dict; use directly - pass else: raise ValueError(value, kwargs) From 11b21e185a6875e4183f1a4c44efa357fd25e129 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 21:13:08 +0200 Subject: [PATCH 07/18] Ignore .coverage* --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 57fa4c488..a3535bb3b 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ __pycache__ *.py[cod] # Development and build files -.coverage +.coverage* .pytest_cache build coverage.xml From 16648669fe3d3e3fb019676023e6ae221bff1860 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 21:23:03 +0200 Subject: [PATCH 08/18] Move model.PACKAGE, model.get_class from reader.sdmxml --- sdmx/model.py | 29 ++++++++++++++++++++++++++++ sdmx/reader/sdmxml.py | 44 ++++++++++--------------------------------- sdmx/urn.py | 8 ++------ 3 files changed, 41 insertions(+), 40 deletions(-) diff --git a/sdmx/model.py b/sdmx/model.py index f91a8898a..5c95c7a7e 100644 --- a/sdmx/model.py +++ b/sdmx/model.py @@ -1621,3 +1621,32 @@ class ProvisionAgreement(MaintainableArtefact, ConstrainableArtefact): structure_usage: StructureUsage = None #: data_provider: DataProvider = None + + +#: The SDMX-IM defines 'packages'; these are used in URNs. +PACKAGE = dict() + +_PACKAGE_CLASS = { + 'base': {Agency, AgencyScheme, DataProvider}, + 'categoryscheme': {Category, Categorisation, CategoryScheme}, + 'codelist': {Code, Codelist}, + 'conceptscheme': {Concept, ConceptScheme}, + 'datastructure': {DataflowDefinition, DataStructureDefinition}, + 'registry': {ContentConstraint, ProvisionAgreement}, + } + +for package, classes in _PACKAGE_CLASS.items(): + PACKAGE.update({cls: package for cls in classes}) + + +def get_class(cls, package=None): + """Return a class object for string *cls* and *package* names.""" + if isinstance(cls, str): + if cls in 'Dataflow DataStructure': + cls += 'Definition' + cls = globals()[cls] + + if package and package != PACKAGE[cls]: + raise ValueError(f'Package {repr(package)} invalid for {cls}') + + return cls diff --git a/sdmx/reader/sdmxml.py b/sdmx/reader/sdmxml.py index b31f294a7..876863f4a 100644 --- a/sdmx/reader/sdmxml.py +++ b/sdmx/reader/sdmxml.py @@ -151,30 +151,6 @@ def to_snake(value): } -# Mappings from SDMX-ML 'package' to contained classes -PACKAGE_CLASS = { - 'base': {Agency, AgencyScheme, DataProvider}, - 'categoryscheme': {Category, Categorisation, CategoryScheme}, - 'codelist': {Code, Codelist}, - 'conceptscheme': {Concept, ConceptScheme}, - 'datastructure': {DataflowDefinition, DataStructureDefinition}, - 'registry': {ContentConstraint, ProvisionAgreement}, - } - - -def get_class(package, cls): - """Return a class object for string *cls* and *package* names.""" - if isinstance(cls, str): - if cls in 'Dataflow DataStructure': - cls += 'Definition' - cls = getattr(sdmx.model, cls) - - assert cls in PACKAGE_CLASS[package], \ - f'Package {package!r} invalid for {cls}' - - return cls - - def wrap(value): """Return *value* as a list. @@ -453,7 +429,7 @@ def _maintained(self, cls=None, id=None, urn=None, **kwargs): """ if urn: match = sdmx.urn.match(urn) - cls = get_class(match['package'], match['class']) + cls = sdmx.model.get_class(match['class'], match['package']) id = match['id'] # Re-add the URN to the kwargs @@ -605,21 +581,21 @@ def parse_ref(self, elem, parent=None): # Determine the class of the ref'd object try: # 'package' and 'class' attributes give the class directly - cls = get_class(attr.pop('package'), attr.pop('class')) + cls = sdmx.model.get_class(attr.pop('class'), attr.pop('package')) except KeyError: # No 'package' and 'class' attributes if parent == 'Parent': # Ref to parent of an Item in an ItemScheme; the ref'd object # has the same class as the Item - cls = getattr(sdmx.model, self._stack[-1]) + cls = sdmx.model.get_class(self._stack[-1]) elif parent in ('AttachmentGroup', 'Group'): cls = GroupDimensionDescriptor elif parent in ('Dimension', 'DimensionReference'): # References to Dimensions cls = [Dimension, TimeDimension] else: - cls = getattr(sdmx.model, parent) + cls = sdmx.model.get_class(parent) # Get or instantiate the object itself try: @@ -754,7 +730,7 @@ def parse_dataset(self, elem): self._current[(DataStructureDefinition, None)] = dsd # DataSet class, e.g. GenericDataSet for root XML tag 'GenericData' - DataSetClass = getattr(sdmx.model, f'{self._stack[0]}Set') + DataSetClass = sdmx.model.get_class(f'{self._stack[0]}Set') # Create the object ds = DataSetClass(structured_by=dsd) @@ -930,7 +906,7 @@ def parse_structures(self, elem): return self._parse(elem, unwrap=False) def parse_organisation(self, elem): - cls = getattr(sdmx.model, QName(elem).localname) + cls = sdmx.model.get_class(QName(elem).localname) o, values = self._named(cls, elem) o.contact = wrap(values.pop('contact', [])) assert len(values) == 0 @@ -1026,7 +1002,7 @@ def parse_conceptidentity(self, elem): raise ValueError(values['urn']) # Look up the parent ConceptScheme - cls = get_class(match['package'], 'ConceptScheme') + cls = sdmx.model.get_class('ConceptScheme', match['package']) cs = self._maintained(cls=cls, id=match['id']) # Get or create the Concept within *cs* @@ -1043,7 +1019,7 @@ def parse_constraintattachment(self, elem): return result def parse_orgscheme(self, elem): - cls = getattr(sdmx.model, QName(elem).localname) + cls = sdmx.model.get_class(QName(elem).localname) os, values = self._named(cls, elem, unwrap=False) # Get the list of organisations. The following assumes that the # *values* dict has only one item. Otherwise, the returned item will be @@ -1098,7 +1074,7 @@ def parse_componentlist(self, elem): # fixed to 'DimensionDescriptor'." cls_name = QName(elem).localname.replace('List', 'Descriptor') finally: - ComponentListClass = getattr(sdmx.model, cls_name) + ComponentListClass = sdmx.model.get_class(cls_name) cl = ComponentListClass( components=list(chain(*self._parse(elem, unwrap=False).values())), @@ -1116,7 +1092,7 @@ def parse_dimension(self, elem): values = self._parse(elem) # Object class: Dimension, MeasureDimension, or TimeDimension - DimensionClass = getattr(sdmx.model, QName(elem).localname) + DimensionClass = sdmx.model.get_class(QName(elem).localname) args = copy(elem.attrib) try: diff --git a/sdmx/urn.py b/sdmx/urn.py index 40de54bda..a6f7f2965 100644 --- a/sdmx/urn.py +++ b/sdmx/urn.py @@ -1,6 +1,6 @@ import re -from sdmx.model import Code, Codelist +from sdmx.model import PACKAGE # Regular expression for URNs @@ -14,14 +14,10 @@ 'urn:sdmx:org.sdmx.infomodel.{package}.{obj.__class__.__name__}=' '{obj.maintainer.id}:{obj.id}({obj.version})' ) -_PACKAGE = { - Code: 'codelist', - Codelist: 'codelist', -} def make(obj): - return _BASE.format(obj=obj, package=_PACKAGE[obj.__class__]) + return _BASE.format(obj=obj, package=PACKAGE[obj.__class__]) def match(string): From 0eefbb84a5b09e5777db15dbea0e1eddebad561f Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 21:32:46 +0200 Subject: [PATCH 09/18] Add sdmx.writer.xml and test --- sdmx/__init__.py | 3 +- sdmx/tests/writer/test_writer_xml.py | 34 +++++++++++++++ sdmx/writer/__init__.py | 6 ++- sdmx/writer/xml.py | 63 ++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 2 deletions(-) create mode 100644 sdmx/tests/writer/test_writer_xml.py create mode 100644 sdmx/writer/xml.py diff --git a/sdmx/__init__.py b/sdmx/__init__.py index e89ede2dc..b9d5705a1 100644 --- a/sdmx/__init__.py +++ b/sdmx/__init__.py @@ -3,7 +3,7 @@ from sdmx.api import Request, read_sdmx, read_url from sdmx.source import add_source, list_sources from sdmx.util import Resource -from sdmx.writer import to_pandas +from sdmx.writer import to_pandas, to_xml import logging __all__ = [ @@ -15,6 +15,7 @@ 'read_sdmx', 'read_url', 'to_pandas', + 'to_xml' ] diff --git a/sdmx/tests/writer/test_writer_xml.py b/sdmx/tests/writer/test_writer_xml.py new file mode 100644 index 000000000..0a382105b --- /dev/null +++ b/sdmx/tests/writer/test_writer_xml.py @@ -0,0 +1,34 @@ +import pytest +import sdmx +from sdmx.message import StructureMessage +from sdmx.model import Agency, Code, Codelist + + +def test_codelist(): + ECB = Agency(id='ECB') + cl = Codelist( + id='CL_COLLECTION', + version='1.0', + is_final=False, + is_external_reference=False, + maintainer=ECB, + name={'en': 'Collection indicator code list'}, + ) + cl.items['A'] = Code( + id='A', + name={'en': "Average of observations through period"}, + ) + cl.items['B'] = Code( + id='B', + name={'en': 'Beginning of period'}, + ) + + sdmx.to_xml(cl) + + +def test_not_implemented(): + msg = StructureMessage() + + with pytest.raises(NotImplementedError, + match='write StructureMessage to XML'): + sdmx.to_xml(msg) diff --git a/sdmx/writer/__init__.py b/sdmx/writer/__init__.py index 7ca93b93e..72c1e449f 100644 --- a/sdmx/writer/__init__.py +++ b/sdmx/writer/__init__.py @@ -1,3 +1,7 @@ from .pandas import write as to_pandas +from .xml import write as to_xml -__all__ = ['to_pandas'] +__all__ = [ + 'to_pandas', + 'to_xml', +] diff --git a/sdmx/writer/xml.py b/sdmx/writer/xml.py new file mode 100644 index 000000000..f7f0057b8 --- /dev/null +++ b/sdmx/writer/xml.py @@ -0,0 +1,63 @@ +from lxml import etree +from lxml.builder import ElementMaker + +from sdmx.format.xml import NS, qname +from sdmx.model import Codelist, ItemScheme +import sdmx.urn + + +_ALIAS = { + Codelist: ItemScheme, +} + +E = ElementMaker(nsmap=NS) + + +def write(obj, *args, **kwargs): + """Convert an SDMX *obj* to XML. + + Implements a dispatch pattern according to the type of *obj*. For instance, + a :class:`.DataSet` object is converted using :func:`.write_dataset`. See + individual ``write_*`` methods named for more information on their + behaviour, including accepted *args* and *kwargs*. + """ + return etree.tostring(_write(obj, *args, **kwargs), pretty_print=True) + + +def _write(obj, *args, **kwargs): + """Helper for :meth:`write`; returns :class:`lxml.Element` object(s).""" + cls = obj.__class__ + func_name = 'write_' + _ALIAS.get(cls, cls).__name__.lower() + try: + func = globals()[func_name] + except KeyError: + raise NotImplementedError(f'write {obj.__class__.__name__} to XML') + else: + return func(obj, *args, **kwargs) + + +def write_nameableartefact(obj, elem): + for locale, label in obj.name.localizations.items(): + child = E(qname('com', 'Name'), label) + child.set(qname('xml', 'lang'), locale) + elem.append(child) + + +def write_maintainableartefact(obj): + urn = sdmx.urn.make(obj) + elem = E(qname('str', obj.__class__.__name__), urn=urn) + write_nameableartefact(obj, elem) + return elem + + +def write_itemscheme(obj): + elem = write_maintainableartefact(obj) + elem.extend(write_item(i, parent_elem=elem) for i in obj.items.values()) + return elem + + +def write_item(obj, parent_elem): + # NB this isn't correct: produces .Codelist instead of .Code + elem = E(qname('str', 'Code'), urn=f"{parent_elem.attrib['urn']}.{obj.id}") + write_nameableartefact(obj, elem) + return elem From cfcef1708ca7ce0d328383a9fc98db4996f01aba Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 21:52:50 +0200 Subject: [PATCH 10/18] Move writer data to a fixture --- sdmx/tests/writer/conftest.py | 27 +++++++++++++++++++++++++++ sdmx/tests/writer/test_writer_xml.py | 23 ++--------------------- 2 files changed, 29 insertions(+), 21 deletions(-) create mode 100644 sdmx/tests/writer/conftest.py diff --git a/sdmx/tests/writer/conftest.py b/sdmx/tests/writer/conftest.py new file mode 100644 index 000000000..9ab28e472 --- /dev/null +++ b/sdmx/tests/writer/conftest.py @@ -0,0 +1,27 @@ +import pytest +from sdmx.model import Agency, Code, Codelist + + +@pytest.fixture +def codelist(): + ECB = Agency(id='ECB') + + cl = Codelist( + id='CL_COLLECTION', + version='1.0', + is_final=False, + is_external_reference=False, + maintainer=ECB, + name={'en': 'Collection indicator code list'}, + ) + + cl.items['A'] = Code( + id='A', + name={'en': "Average of observations through period"}, + ) + cl.items['B'] = Code( + id='B', + name={'en': 'Beginning of period'}, + ) + + return cl diff --git a/sdmx/tests/writer/test_writer_xml.py b/sdmx/tests/writer/test_writer_xml.py index 0a382105b..6cffcbb9e 100644 --- a/sdmx/tests/writer/test_writer_xml.py +++ b/sdmx/tests/writer/test_writer_xml.py @@ -1,29 +1,10 @@ import pytest import sdmx from sdmx.message import StructureMessage -from sdmx.model import Agency, Code, Codelist -def test_codelist(): - ECB = Agency(id='ECB') - cl = Codelist( - id='CL_COLLECTION', - version='1.0', - is_final=False, - is_external_reference=False, - maintainer=ECB, - name={'en': 'Collection indicator code list'}, - ) - cl.items['A'] = Code( - id='A', - name={'en': "Average of observations through period"}, - ) - cl.items['B'] = Code( - id='B', - name={'en': 'Beginning of period'}, - ) - - sdmx.to_xml(cl) +def test_codelist(codelist): + sdmx.to_xml(codelist) def test_not_implemented(): From edeadcd8584e32cedefcb70e4647c7af39950bfd Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Mon, 4 May 2020 22:48:02 +0200 Subject: [PATCH 11/18] Add sdmx.writer.protobuf and test --- sdmx/tests/writer/test_protobuf.py | 21 +++++++++++ sdmx/writer/protobuf.py | 59 ++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 sdmx/tests/writer/test_protobuf.py create mode 100644 sdmx/writer/protobuf.py diff --git a/sdmx/tests/writer/test_protobuf.py b/sdmx/tests/writer/test_protobuf.py new file mode 100644 index 000000000..f2bddee4c --- /dev/null +++ b/sdmx/tests/writer/test_protobuf.py @@ -0,0 +1,21 @@ +import logging + +import pytest +from sdmx.message import StructureMessage +from sdmx.writer.protobuf import write as to_protobuf + + +@pytest.mark.xfail(raises=RuntimeError, + match='sdmx.format.protobuf_pb2 missing') +def test_codelist(caplog, codelist): + msg = StructureMessage() + msg.codelist[codelist.id] = codelist + + caplog.set_level(logging.ERROR) + + result = to_protobuf(msg) + + print(result) + + # No errors logged + assert len(caplog.messages) == 0 diff --git a/sdmx/writer/protobuf.py b/sdmx/writer/protobuf.py new file mode 100644 index 000000000..b99a16168 --- /dev/null +++ b/sdmx/writer/protobuf.py @@ -0,0 +1,59 @@ +import logging + +try: + import sdmx.format.protobuf_pb2 as pb +except ImportError: + pb = None + + +log = logging.getLogger(__name__) + + +def write(obj, *args, **kwargs): + """Convert an SDMX *obj* to protobuf string.""" + if not pb: + raise RuntimeError('sdmx.format.protobuf_pb2 missing') + + return _write(obj, *args, **kwargs).SerializeToString() + + +def _write(obj, *args, **kwargs): + """Helper for :meth:`write`; returns :mod:`protobuf` object(s).""" + cls_name = obj.__class__.__name__ + func_name = f'write_{cls_name.lower()}' + try: + func = globals()[func_name] + except KeyError: + raise NotImplementedError(f'write {cls_name} to protobuf') + else: + return func(obj, *args, **kwargs) + + +def _copy(obj, pb_obj): + """Update the attributes of *pb_obj* from the sdmx.message/.model *obj*.""" + dir_logged = False + + for attr, value in obj.__dict__.items(): + if not value: + continue + + try: + setattr(pb_obj, attr, value) + log.info(f'Set {attr}') + except Exception as exc: + log.error(f'Failed to set {attr}: {exc}') + + if not dir_logged: + fields = filter(lambda n: not n.startswith('_'), dir(pb_obj)) + log.info(sorted(fields)) + dir_logged = True + + +def write_structuremessage(obj, *args, **kwargs): + envelope = pb.Envelope() + + for cl in obj.codelist.values(): + pb_obj = envelope.data.codelists.add() + _copy(cl, pb_obj) + + return envelope From 3552f8c02573a7ada35b7501b35de597c1ed30e3 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 5 May 2020 12:04:04 +0200 Subject: [PATCH 12/18] Add sdmx.writer.base.BaseWriter --- sdmx/tests/writer/conftest.py | 11 ++++ sdmx/tests/writer/test_writer_xml.py | 14 +++-- sdmx/writer/base.py | 65 +++++++++++++++++++++++ sdmx/writer/xml.py | 77 ++++++++++++++++------------ 4 files changed, 129 insertions(+), 38 deletions(-) create mode 100644 sdmx/writer/base.py diff --git a/sdmx/tests/writer/conftest.py b/sdmx/tests/writer/conftest.py index 9ab28e472..84ddee643 100644 --- a/sdmx/tests/writer/conftest.py +++ b/sdmx/tests/writer/conftest.py @@ -1,9 +1,11 @@ import pytest +from sdmx.message import StructureMessage from sdmx.model import Agency, Code, Codelist @pytest.fixture def codelist(): + """A Codelist for writer testing.""" ECB = Agency(id='ECB') cl = Codelist( @@ -25,3 +27,12 @@ def codelist(): ) return cl + + +@pytest.fixture +def structuremessage(codelist): + """A StructureMessage for writer testing.""" + sm = StructureMessage() + sm.codelist[codelist.id] = codelist + + return sm diff --git a/sdmx/tests/writer/test_writer_xml.py b/sdmx/tests/writer/test_writer_xml.py index 6cffcbb9e..baf827d13 100644 --- a/sdmx/tests/writer/test_writer_xml.py +++ b/sdmx/tests/writer/test_writer_xml.py @@ -1,15 +1,21 @@ import pytest import sdmx -from sdmx.message import StructureMessage +from sdmx.message import DataMessage def test_codelist(codelist): - sdmx.to_xml(codelist) + result = sdmx.to_xml(codelist) + print(result.decode()) + + +def test_structuremessage(structuremessage): + result = sdmx.to_xml(structuremessage) + print(result.decode()) def test_not_implemented(): - msg = StructureMessage() + msg = DataMessage() with pytest.raises(NotImplementedError, - match='write StructureMessage to XML'): + match='write DataMessage to XML'): sdmx.to_xml(msg) diff --git a/sdmx/writer/base.py b/sdmx/writer/base.py new file mode 100644 index 000000000..42e4685be --- /dev/null +++ b/sdmx/writer/base.py @@ -0,0 +1,65 @@ +from functools import singledispatch + + +class BaseWriter: + """Base class for recursive writers. + + Usage: + + - Create an instance of this class. + - Use :meth:`register` in the same manner as Python's built-in + :func:`functools.singledispatch` to decorate functions that certain types + of :mod:`sdmx.model` or :mod:`sdmx.message` objects. + - Call :meth:`recurse` to kick off recursive writing of objects, including + from inside other functions. + + Example + ------- + MyWriter = BaseWriter('my') + + @MyWriter.register + def _(obj: sdmx.model.ItemScheme): + ... code to write an ItemScheme ... + return result + + @MyWriter.register + def _(obj: sdmx.model.Codelist): + ... code to write a Codelist ... + return result + """ + def __init__(self, format_name): + # Create the single-dispatch function + @singledispatch + def func(obj, *args, **kwargs): + raise NotImplementedError(f'write {obj.__class__.__name__} to ' + f'{format_name}') + + self._dispatcher = func + + def recurse(self, obj, *args, **kwargs): + """Recursively write *obj*. + + If there is no :meth:`register` 'ed function to write the class of + `obj`, then the parent class of `obj` is used to find a method. + """ + # TODO use a cache to speed up the MRO does not need to be traversed + # for every object instance + + dispatcher = getattr(self, '_dispatcher') + try: + # Let the single dispatch function choose the overload + return dispatcher(obj, *args, **kwargs) + except NotImplementedError as exc: + try: + # Use the object's parent class to get a different overload + func = dispatcher.registry[obj.__class__.mro()[1]] + except KeyError: + # Overload for the parent class did not exist + raise exc + + return func(obj, *args, **kwargs) + + def register(self, func): + """Register *func* as a writer for a particular object type.""" + dispatcher = getattr(self, '_dispatcher') + dispatcher.register(func) diff --git a/sdmx/writer/xml.py b/sdmx/writer/xml.py index f7f0057b8..3bbdb3b5c 100644 --- a/sdmx/writer/xml.py +++ b/sdmx/writer/xml.py @@ -1,63 +1,72 @@ from lxml import etree from lxml.builder import ElementMaker +from sdmx import message, model from sdmx.format.xml import NS, qname -from sdmx.model import Codelist, ItemScheme import sdmx.urn +from sdmx.writer.base import BaseWriter -_ALIAS = { - Codelist: ItemScheme, -} +_element_maker = ElementMaker(nsmap=NS) -E = ElementMaker(nsmap=NS) +def Element(name, *args, **kwargs): + return _element_maker(qname(*name.split(':')), *args, **kwargs) -def write(obj, *args, **kwargs): - """Convert an SDMX *obj* to XML. - Implements a dispatch pattern according to the type of *obj*. For instance, - a :class:`.DataSet` object is converted using :func:`.write_dataset`. See - individual ``write_*`` methods named for more information on their - behaviour, including accepted *args* and *kwargs*. - """ - return etree.tostring(_write(obj, *args, **kwargs), pretty_print=True) +Writer = BaseWriter('XML') + +def write(obj, *args, **kwargs): + pp = kwargs.pop('pretty_print', True) + tree = Writer.recurse(obj, *args, **kwargs) + return etree.tostring( + tree, + pretty_print=pp, + ) -def _write(obj, *args, **kwargs): - """Helper for :meth:`write`; returns :class:`lxml.Element` object(s).""" - cls = obj.__class__ - func_name = 'write_' + _ALIAS.get(cls, cls).__name__.lower() - try: - func = globals()[func_name] - except KeyError: - raise NotImplementedError(f'write {obj.__class__.__name__} to XML') - else: - return func(obj, *args, **kwargs) +# Utility functions -def write_nameableartefact(obj, elem): +def nameable(obj, elem): for locale, label in obj.name.localizations.items(): - child = E(qname('com', 'Name'), label) + child = Element('com:Name', label) child.set(qname('xml', 'lang'), locale) elem.append(child) -def write_maintainableartefact(obj): +def maintainable(obj): urn = sdmx.urn.make(obj) - elem = E(qname('str', obj.__class__.__name__), urn=urn) - write_nameableartefact(obj, elem) + elem = Element(f'str:{obj.__class__.__name__}', urn=urn) + nameable(obj, elem) return elem -def write_itemscheme(obj): - elem = write_maintainableartefact(obj) - elem.extend(write_item(i, parent_elem=elem) for i in obj.items.values()) +@Writer.register +def _(obj: message.StructureMessage): + msg = Element('mes:StructureMessage') + structures = Element('mes:Structures') + msg.append(structures) + + codelists = Element('mes:Codelists') + structures.append(codelists) + codelists.extend(Writer.recurse(cl) for cl in obj.codelist.values()) + + return msg + + +@Writer.register +def _(obj: model.ItemScheme): + elem = maintainable(obj) + elem.extend(Writer.recurse(i, parent_elem=elem) + for i in obj.items.values()) return elem -def write_item(obj, parent_elem): +@Writer.register +def _(obj: model.Item, parent_elem): # NB this isn't correct: produces .Codelist instead of .Code - elem = E(qname('str', 'Code'), urn=f"{parent_elem.attrib['urn']}.{obj.id}") - write_nameableartefact(obj, elem) + elem = Element(f'str:{obj.__class__.__name__}', + urn=f"{parent_elem.attrib['urn']}.{obj.id}") + nameable(obj, elem) return elem From 7d4961ce179951baa144f3298ab4e13632add007 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 5 May 2020 12:28:46 +0200 Subject: [PATCH 13/18] Simplify writer.pandas using writer.base --- sdmx/writer/base.py | 1 + sdmx/writer/pandas.py | 78 +++++++++++++++++++------------------------ 2 files changed, 35 insertions(+), 44 deletions(-) diff --git a/sdmx/writer/base.py b/sdmx/writer/base.py index 42e4685be..61c0534db 100644 --- a/sdmx/writer/base.py +++ b/sdmx/writer/base.py @@ -63,3 +63,4 @@ def register(self, func): """Register *func* as a writer for a particular object type.""" dispatcher = getattr(self, '_dispatcher') dispatcher.register(func) + return func diff --git a/sdmx/writer/pandas.py b/sdmx/writer/pandas.py index dcc3596fa..f08a2d797 100644 --- a/sdmx/writer/pandas.py +++ b/sdmx/writer/pandas.py @@ -3,28 +3,19 @@ import numpy as np import pandas as pd -from sdmx import model +from sdmx import message, model from sdmx.model import ( DEFAULT_LOCALE, - AgencyScheme, AllDimensions, DataAttribute, - DataflowDefinition, - DataStructureDefinition, DataSet, Dimension, DimensionComponent, - # DimensionDescriptor, - CategoryScheme, - Codelist, - Component, - ConceptScheme, - ItemScheme, - NameableArtefact, Observation, SeriesKey, TimeDimension, ) +from sdmx.writer.base import BaseWriter from sdmx.util import DictLike @@ -33,22 +24,7 @@ DEFAULT_RTYPE = 'rows' -# Class → common write_*() methods -_ALIAS = { - DictLike: dict, - AgencyScheme: ItemScheme, - CategoryScheme: ItemScheme, - ConceptScheme: ItemScheme, - Codelist: ItemScheme, - DataflowDefinition: NameableArtefact, - DataStructureDefinition: NameableArtefact, - Dimension: Component, - TimeDimension: Component, - model.GenericDataSet: DataSet, - model.GenericTimeSeriesDataSet: DataSet, - model.StructureSpecificDataSet: DataSet, - model.StructureSpecificTimeSeriesDataSet: DataSet, -} +Writer = BaseWriter('pandas') def write(obj, *args, **kwargs): @@ -59,13 +35,15 @@ def write(obj, *args, **kwargs): individual ``write_*`` methods named for more information on their behaviour, including accepted *args* and *kwargs*. """ - cls = obj.__class__ - function = 'write_' + _ALIAS.get(cls, cls).__name__.lower() - return globals()[function](obj, *args, **kwargs) + return Writer.recurse(obj, *args, **kwargs) + # cls = obj.__class__ + # function = 'write_' + _ALIAS.get(cls, cls).__name__.lower() + # return globals()[function](obj, *args, **kwargs) # Functions for Python containers -def write_list(obj, *args, **kwargs): +@Writer.register +def _(obj: list, *args, **kwargs): """Convert a :class:`list` of SDMX objects. For the following *obj*, :meth:`write_list` returns :class:`pandas.Series` @@ -88,7 +66,8 @@ def write_list(obj, *args, **kwargs): return [write(item, *args, **kwargs) for item in obj] -def write_dict(obj, *args, **kwargs): +@Writer.register +def _(obj: dict, *args, **kwargs): """Convert mappings. The values of the mapping are write()'d individually. If the resulting @@ -120,14 +99,16 @@ def write_dict(obj, *args, **kwargs): raise ValueError(result_type) -def write_set(obj, *args, **kwargs): +@Writer.register +def _(obj: set, *args, **kwargs): """Convert :class:`set`.""" result = {write(o, *args, **kwargs) for o in obj} return result # Functions for message classes -def write_datamessage(obj, *args, rtype=None, **kwargs): +@Writer.register +def _(obj: message.DataMessage, *args, rtype=None, **kwargs): """Convert :class:`.DataMessage`. The data set(s) within the message are converted to pandas objects. @@ -162,7 +143,8 @@ def write_datamessage(obj, *args, rtype=None, **kwargs): return [write(ds, *args, **kwargs) for ds in obj.data] -def write_structuremessage(obj, include=None, **kwargs): +@Writer.register +def _(obj: message.StructureMessage, include=None, **kwargs): """Convert :class:`.StructureMessage`. Parameters @@ -210,7 +192,8 @@ def write_structuremessage(obj, include=None, **kwargs): # Functions for model classes -def write_component(obj): +@Writer.register +def _(obj: model.Component): """Convert :class:`.Component`. The :attr:`~.Concept.id` attribute of the @@ -219,7 +202,8 @@ def write_component(obj): return str(obj.concept_identity.id) -def write_contentconstraint(obj, **kwargs): +@Writer.register +def _(obj: model.ContentConstraint, **kwargs): """Convert :class:`.ContentConstraint`.""" if len(obj.data_content_region) != 1: raise NotImplementedError @@ -227,7 +211,8 @@ def write_contentconstraint(obj, **kwargs): return write(obj.data_content_region[0], **kwargs) -def write_cuberegion(obj, **kwargs): +@Writer.register +def _(obj: model.CubeRegion, **kwargs): """Convert :class:`.CubeRegion`.""" result = DictLike() for dim, memberselection in obj.member.items(): @@ -236,8 +221,9 @@ def write_cuberegion(obj, **kwargs): return result -def write_dataset(obj, attributes='', dtype=np.float64, constraint=None, - datetime=False, **kwargs): +@Writer.register +def write_dataset(obj: model.DataSet, attributes='', dtype=np.float64, + constraint=None, datetime=False, **kwargs): """Convert :class:`~.DataSet`. See the :ref:`walkthrough ` for examples of using the `datetime` @@ -501,7 +487,8 @@ def _get_attrs(): return df -def write_dimensiondescriptor(obj): +@Writer.register +def _(obj: model.DimensionDescriptor): """Convert :class:`.DimensionDescriptor`. The :attr:`~.DimensionDescriptor.components` of the DimensionDescriptor @@ -510,7 +497,8 @@ def write_dimensiondescriptor(obj): return write(obj.components) -def write_itemscheme(obj, locale=DEFAULT_LOCALE): +@Writer.register +def _(obj: model.ItemScheme, locale=DEFAULT_LOCALE): """Convert :class:`.ItemScheme`. Names from *locale* are serialized. @@ -558,12 +546,14 @@ def add_item(item): return result -def write_membervalue(obj): +@Writer.register +def _(obj: model.MemberValue): """Convert :class:`.MemberValue`.""" return obj.value -def write_nameableartefact(obj): +@Writer.register +def _(obj: model.NameableArtefact): """Convert :class:`.NameableArtefact`. The :attr:`~.NameableArtefact.name` attribute of *obj* is returned. From debf5c7048d4e58c12cd6d998c35e201e91749e5 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 5 May 2020 12:29:04 +0200 Subject: [PATCH 14/18] Also call sdmx.to_pandas() on data source tests --- sdmx/tests/test_sources.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index d35b853c2..b54d3f90f 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -7,10 +7,11 @@ import logging import os +import sdmx +from sdmx import Resource from sdmx.api import Request from sdmx.exceptions import HTTPError from sdmx.source import DataContentType, sources -from sdmx.util import Resource import pytest import requests_mock @@ -129,6 +130,8 @@ def test_endpoints(self, req, endpoint, args): # print(cache, cache.read_text(), result, sep='\n\n') # assert False + sdmx.to_pandas(result) + del result From 44bcfa69c7d20eaa46b760a3dddd0bfd2744cd3f Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 5 May 2020 12:49:47 +0200 Subject: [PATCH 15/18] Move coverage configuration to setup.cfg --- .coveragerc | 3 --- setup.cfg | 6 ++++++ 2 files changed, 6 insertions(+), 3 deletions(-) delete mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 07e304d14..000000000 --- a/.coveragerc +++ /dev/null @@ -1,3 +0,0 @@ -[run] -omit = - sdmx/experimental.py diff --git a/setup.cfg b/setup.cfg index f80bfe46b..c5c2d7d5c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -61,3 +61,9 @@ addopts = sdmx -m "not experimental" markers = experimental: experimental features + +[coverage:run] +omit = + sdmx/experimental.py + sdmx/tests/writer/test_protobuf.py + sdmx/writer/protobuf.py From 79dfeef33da2c2a15acb63e738ef061b43fc6678 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 5 May 2020 13:40:21 +0200 Subject: [PATCH 16/18] Edit documentation for writer.xml, writer.pandas --- doc/api.rst | 88 ++++++++++++++++++++-------- doc/whatsnew.rst | 6 +- sdmx/__init__.py | 2 +- sdmx/tests/writer/test_writer_xml.py | 4 +- sdmx/writer/pandas.py | 61 ++++--------------- sdmx/writer/xml.py | 23 +++++--- 6 files changed, 99 insertions(+), 85 deletions(-) diff --git a/doc/api.rst b/doc/api.rst index 2237f8ab1..d609181d7 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -68,41 +68,77 @@ SDMX-JSON :undoc-members: -``writer``: Convert SDMX to pandas objects ------------------------------------------- +``writer``: Convert ``sdmx`` objects to other formats +----------------------------------------------------- + +.. _writer-pandas: + +``writer.pandas``: Convert to ``pandas`` objects +:::::::::::::::::::::::::::::::::::::::::::::::: + +.. currentmodule:: sdmx.writer.pandas + .. versionchanged:: 1.0 - :meth:`sdmx.to_pandas` (via :meth:`write `) - handles all types of objects, replacing the earlier, separate - ``data2pandas`` and ``structure2pd`` writers. + :meth:`sdmx.to_pandas` handles all types of objects, replacing the earlier, separate ``data2pandas`` and ``structure2pd`` writers. -.. automodule:: sdmx.writer - :members: - :exclude-members: write - - .. automethod:: sdmx.writer.write - - .. autosummary:: - write_component - write_datamessage - write_dataset - write_dict - write_dimensiondescriptor - write_itemscheme - write_list - write_membervalue - write_nameableartefact - write_serieskeys - write_structuremessage - -.. autodata:: DEFAULT_RTYPE - :noindex: +:func:`.to_pandas` implements a dispatch pattern according to the type of *obj*. +Some of the internal methods take specific arguments and return varying values. +These arguments can be passed to :meth:`to_pandas` when `obj` is of the appropriate type: + +.. autosummary:: + sdmx.writer.pandas.write_dataset + sdmx.writer.pandas.write_datamessage + sdmx.writer.pandas.write_itemscheme + sdmx.writer.pandas.write_structuremessage + sdmx.writer.pandas.DEFAULT_RTYPE + +Other objects are converted as follows: + +:class:`.Component` + The :attr:`~.Concept.id` attribute of the :attr:`~.Component.concept_identity` is returned. + +:class:`.DataMessage` + The :class:`.DataSet` or data sets within the Message are converted to pandas objects. + Returns: + + - :class:`pandas.Series` or :class:`pandas.DataFrame`, if `obj` has only one data set. + - list of (Series or DataFrame), if `obj` has more than one data set. + +:class:`.dict` + The values of the mapping are converted individually. + If the resulting values are :class:`str` or Series *with indexes that share the same name*, then they are converted to a Series, possibly with a :class:`pandas.MultiIndex`. + Otherwise, a :class:`.DictLike` is returned. + +:class:`.DimensionDescriptor` + The :attr:`~.DimensionDescriptor.components` of the DimensionDescriptor are written. + +:class:`list` + For the following *obj*, returns Series instead of a :class:`list`: + + - a list of :class:`.Observation`: the Observations are written using :meth:`write_dataset`. + - a list with only 1 :class:`.DataSet` (e.g. the :attr:`~.DataMessage.data` attribute of :class:`.DataMessage`): the Series for the single element is returned. + - a list of :class:`.SeriesKey`: the key values (but no data) are returned. + +:class:`.NameableArtefact` + The :attr:`~.NameableArtefact.name` attribute of `obj` is returned. + +.. automodule:: sdmx.writer.pandas + :members: DEFAULT_RTYPE, write_dataset, write_datamessage, write_itemscheme, write_structuremessage .. todo:: Support selection of language for conversion of :class:`InternationalString `. +``writer.xml``: Write to SDMX-ML +:::::::::::::::::::::::::::::::: + +.. versionadded:: 1.1 + +See :func:`.to_xml`. + + ``remote``: Access SDMX REST web services ----------------------------------------- .. autoclass:: sdmx.remote.Session diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst index f94f50f36..7e2194ac3 100644 --- a/doc/whatsnew.rst +++ b/doc/whatsnew.rst @@ -6,7 +6,11 @@ What's new? Next release (vX.Y.0) ===================== -- Test suite improvements +- New features: + + - :pull:`3`: Add :meth:`to_xml` to generate SDMX-ML for a subset of the IM. + +- Test suite: - :pull:`2`: Add tests of data queries for source(s): OECD diff --git a/sdmx/__init__.py b/sdmx/__init__.py index b9d5705a1..b2b974bf5 100644 --- a/sdmx/__init__.py +++ b/sdmx/__init__.py @@ -15,7 +15,7 @@ 'read_sdmx', 'read_url', 'to_pandas', - 'to_xml' + 'to_xml', ] diff --git a/sdmx/tests/writer/test_writer_xml.py b/sdmx/tests/writer/test_writer_xml.py index baf827d13..7cf6882ac 100644 --- a/sdmx/tests/writer/test_writer_xml.py +++ b/sdmx/tests/writer/test_writer_xml.py @@ -4,12 +4,12 @@ def test_codelist(codelist): - result = sdmx.to_xml(codelist) + result = sdmx.to_xml(codelist, pretty_print=True) print(result.decode()) def test_structuremessage(structuremessage): - result = sdmx.to_xml(structuremessage) + result = sdmx.to_xml(structuremessage, pretty_print=True) print(result.decode()) diff --git a/sdmx/writer/pandas.py b/sdmx/writer/pandas.py index f08a2d797..a98d946ed 100644 --- a/sdmx/writer/pandas.py +++ b/sdmx/writer/pandas.py @@ -30,32 +30,15 @@ def write(obj, *args, **kwargs): """Convert an SDMX *obj* to :mod:`pandas` object(s). - Implements a dispatch pattern according to the type of *obj*. For instance, - a :class:`.DataSet` object is converted using :func:`.write_dataset`. See - individual ``write_*`` methods named for more information on their - behaviour, including accepted *args* and *kwargs*. + See :ref:`sdmx.writer.pandas `. """ return Writer.recurse(obj, *args, **kwargs) - # cls = obj.__class__ - # function = 'write_' + _ALIAS.get(cls, cls).__name__.lower() - # return globals()[function](obj, *args, **kwargs) # Functions for Python containers @Writer.register def _(obj: list, *args, **kwargs): - """Convert a :class:`list` of SDMX objects. - - For the following *obj*, :meth:`write_list` returns :class:`pandas.Series` - instead of a :class:`list`: - - - a list of :class:`.Observation`: the Observations are written using - :meth:`write_dataset`. - - a list with only 1 :class:`.DataSet` (e.g. the - :attr:`~.DataMessage.data>` attribute of :class:`.DataMessage`): the - Series for the single element is returned. - - a list of :class:`.SeriesKey`: the key values (but no data) are returned. - """ + """Convert a :class:`list` of SDMX objects.""" if isinstance(obj[0], Observation): return write_dataset(obj, *args, **kwargs) elif isinstance(obj[0], DataSet) and len(obj) == 1: @@ -68,13 +51,7 @@ def _(obj: list, *args, **kwargs): @Writer.register def _(obj: dict, *args, **kwargs): - """Convert mappings. - - The values of the mapping are write()'d individually. If the resulting - values are :class:`str` or :class:`pd.Series` *with indexes that share the - same name*, then they are converted to a pd.Series, possibly with a - pd.MultiIndex. Otherwise, a DictLike is returned. - """ + """Convert mappings.""" result = {k: write(v, *args, **kwargs) for k, v in obj.items()} result_type = set(type(v) for v in result.values()) @@ -108,11 +85,9 @@ def _(obj: set, *args, **kwargs): # Functions for message classes @Writer.register -def _(obj: message.DataMessage, *args, rtype=None, **kwargs): +def write_datamessage(obj: message.DataMessage, *args, rtype=None, **kwargs): """Convert :class:`.DataMessage`. - The data set(s) within the message are converted to pandas objects. - Parameters ---------- rtype : 'compat' or 'rows', optional @@ -144,7 +119,8 @@ def _(obj: message.DataMessage, *args, rtype=None, **kwargs): @Writer.register -def _(obj: message.StructureMessage, include=None, **kwargs): +def write_structuremessage(obj: message.StructureMessage, include=None, + **kwargs): """Convert :class:`.StructureMessage`. Parameters @@ -194,11 +170,7 @@ def _(obj: message.StructureMessage, include=None, **kwargs): @Writer.register def _(obj: model.Component): - """Convert :class:`.Component`. - - The :attr:`~.Concept.id` attribute of the - :attr:`~.Component.concept_identity` is returned. - """ + """Convert :class:`.Component`.""" return str(obj.concept_identity.id) @@ -489,19 +461,18 @@ def _get_attrs(): @Writer.register def _(obj: model.DimensionDescriptor): - """Convert :class:`.DimensionDescriptor`. - - The :attr:`~.DimensionDescriptor.components` of the DimensionDescriptor - are written. - """ + """Convert :class:`.DimensionDescriptor`.""" return write(obj.components) @Writer.register -def _(obj: model.ItemScheme, locale=DEFAULT_LOCALE): +def write_itemscheme(obj: model.ItemScheme, locale=DEFAULT_LOCALE): """Convert :class:`.ItemScheme`. - Names from *locale* are serialized. + Parameters + ---------- + locale : str, optional + Locale for names to return. Returns ------- @@ -548,21 +519,15 @@ def add_item(item): @Writer.register def _(obj: model.MemberValue): - """Convert :class:`.MemberValue`.""" return obj.value @Writer.register def _(obj: model.NameableArtefact): - """Convert :class:`.NameableArtefact`. - - The :attr:`~.NameableArtefact.name` attribute of *obj* is returned. - """ return str(obj.name) def write_serieskeys(obj): - """Convert a list of :class:`.SeriesKey`.""" result = [] for sk in obj: result.append({dim: kv.value for dim, kv in sk.order().values.items()}) diff --git a/sdmx/writer/xml.py b/sdmx/writer/xml.py index 3bbdb3b5c..3a53109bb 100644 --- a/sdmx/writer/xml.py +++ b/sdmx/writer/xml.py @@ -17,13 +17,22 @@ def Element(name, *args, **kwargs): Writer = BaseWriter('XML') -def write(obj, *args, **kwargs): - pp = kwargs.pop('pretty_print', True) - tree = Writer.recurse(obj, *args, **kwargs) - return etree.tostring( - tree, - pretty_print=pp, - ) +def write(obj, **kwargs): + """Convert an SDMX *obj* to SDMX-ML. + + Parameters + ---------- + kwargs + Passed to :meth:`lxml.etree.to_string`, e.g. `pretty_print` = + :obj:`True`. + + Raises + ------ + NotImplementedError + If writing specific objects to SDMX-ML has not been implemented in + :mod:`sdmx`. + """ + return etree.tostring(Writer.recurse(obj), **kwargs) # Utility functions From cb4e3542d8b18d64d826e89527111a092a6a7e03 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 5 May 2020 14:35:36 +0200 Subject: [PATCH 17/18] Test round-trip of SDMX-ML --- sdmx/tests/writer/test_writer_xml.py | 15 +++++++++++++-- sdmx/urn.py | 27 +++++++++++++++++++++++---- sdmx/writer/xml.py | 18 ++++++++++++------ 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/sdmx/tests/writer/test_writer_xml.py b/sdmx/tests/writer/test_writer_xml.py index 7cf6882ac..3f071eb65 100644 --- a/sdmx/tests/writer/test_writer_xml.py +++ b/sdmx/tests/writer/test_writer_xml.py @@ -3,15 +3,26 @@ from sdmx.message import DataMessage -def test_codelist(codelist): +def test_codelist(tmp_path, codelist): result = sdmx.to_xml(codelist, pretty_print=True) print(result.decode()) -def test_structuremessage(structuremessage): +def test_structuremessage(tmp_path, structuremessage): result = sdmx.to_xml(structuremessage, pretty_print=True) print(result.decode()) + # Message can be round-tripped to/from file + path = tmp_path / 'output.xml' + path.write_bytes(result) + msg = sdmx.read_sdmx(path) + + # Contents match the original object + assert ( + msg.codelist['CL_COLLECTION']['A'].name['en'] + == structuremessage.codelist['CL_COLLECTION']['A'].name['en'] + ) + def test_not_implemented(): msg = DataMessage() diff --git a/sdmx/urn.py b/sdmx/urn.py index a6f7f2965..c71351943 100644 --- a/sdmx/urn.py +++ b/sdmx/urn.py @@ -1,6 +1,6 @@ import re -from sdmx.model import PACKAGE +from sdmx.model import PACKAGE, MaintainableArtefact # Regular expression for URNs @@ -12,12 +12,31 @@ _BASE = ( 'urn:sdmx:org.sdmx.infomodel.{package}.{obj.__class__.__name__}=' - '{obj.maintainer.id}:{obj.id}({obj.version})' + '{ma.maintainer.id}:{ma.id}({ma.version}){extra_id}' ) -def make(obj): - return _BASE.format(obj=obj, package=PACKAGE[obj.__class__]) +def make(obj, maintainable_parent=None): + """Create an SDMX URN for `obj`. + + If `obj` is not :class:`.MaintainableArtefact`, then `maintainable_parent` + must be supplied in order to construct the URN. + """ + if maintainable_parent: + ma = maintainable_parent + extra_id = f'.{obj.id}' + else: + ma = obj + extra_id = '' + + assert isinstance(ma, MaintainableArtefact) + + return _BASE.format( + package=PACKAGE[obj.__class__], + obj=obj, + ma=ma, + extra_id=extra_id, + ) def match(string): diff --git a/sdmx/writer/xml.py b/sdmx/writer/xml.py index 3a53109bb..9e9f791fd 100644 --- a/sdmx/writer/xml.py +++ b/sdmx/writer/xml.py @@ -53,7 +53,11 @@ def maintainable(obj): @Writer.register def _(obj: message.StructureMessage): - msg = Element('mes:StructureMessage') + msg = Element('mes:Structure') + + # Empty header element + msg.append(Element('mes:Header')) + structures = Element('mes:Structures') msg.append(structures) @@ -67,15 +71,17 @@ def _(obj: message.StructureMessage): @Writer.register def _(obj: model.ItemScheme): elem = maintainable(obj) - elem.extend(Writer.recurse(i, parent_elem=elem) - for i in obj.items.values()) + elem.extend(Writer.recurse(i, parent=obj) for i in obj.items.values()) return elem @Writer.register -def _(obj: model.Item, parent_elem): +def _(obj: model.Item, parent): # NB this isn't correct: produces .Codelist instead of .Code - elem = Element(f'str:{obj.__class__.__name__}', - urn=f"{parent_elem.attrib['urn']}.{obj.id}") + elem = Element( + f'str:{obj.__class__.__name__}', + id=obj.id, + urn=sdmx.urn.make(obj, parent), + ) nameable(obj, elem) return elem From 1e92dc70571fc599c0caa60acdf76bcc33e26999 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Tue, 5 May 2020 19:55:38 +0200 Subject: [PATCH 18/18] Extend writer.xml for hierarchical ItemSchemes, Annotations --- sdmx/model.py | 13 ++++-- sdmx/tests/writer/conftest.py | 13 +++++- sdmx/writer/xml.py | 74 +++++++++++++++++++++++++++-------- 3 files changed, 79 insertions(+), 21 deletions(-) diff --git a/sdmx/model.py b/sdmx/model.py index 5c95c7a7e..71ca9b4d3 100644 --- a/sdmx/model.py +++ b/sdmx/model.py @@ -141,7 +141,7 @@ def __add__(self, other): result.localizations.update(other.localizations) return result - def localized_default(self, locale): + def localized_default(self, locale=None): """Return the string in *locale*, or else the first defined.""" try: return self.localizations[locale] @@ -311,12 +311,12 @@ def __init__(self, *args, **kwargs): # Add this Item as a child of its parent parent = kwargs.get('parent', None) - if parent and self not in parent.child: - parent.child.append(self) + if parent: + parent.append_child(self) # Add this Item as a parent of its children for c in kwargs.get('child', []): - c.parent = self + self.append_child(c) def __contains__(self, item): """Recursive containment.""" @@ -324,6 +324,11 @@ def __contains__(self, item): if item == c or item in c: return True + def append_child(self, other): + if other not in self.child: + self.child.append(other) + other.parent = self + def get_child(self, id): """Return the child with the given *id*.""" for c in self.child: diff --git a/sdmx/tests/writer/conftest.py b/sdmx/tests/writer/conftest.py index 84ddee643..e415056d4 100644 --- a/sdmx/tests/writer/conftest.py +++ b/sdmx/tests/writer/conftest.py @@ -1,6 +1,6 @@ import pytest from sdmx.message import StructureMessage -from sdmx.model import Agency, Code, Codelist +from sdmx.model import Agency, Annotation, Code, Codelist @pytest.fixture @@ -25,6 +25,17 @@ def codelist(): id='B', name={'en': 'Beginning of period'}, ) + cl.items['B1'] = Code( + id='B1', + name={'en': 'Child code of B'}, + ) + cl.items['B'].append_child(cl.items['B1']) + + cl.items['A'].annotations.append(Annotation( + id='A1', + type='NOTE', + text={'en': 'Text annotation on Code A.'}, + )) return cl diff --git a/sdmx/writer/xml.py b/sdmx/writer/xml.py index 9e9f791fd..1a06f9c3d 100644 --- a/sdmx/writer/xml.py +++ b/sdmx/writer/xml.py @@ -11,7 +11,9 @@ def Element(name, *args, **kwargs): - return _element_maker(qname(*name.split(':')), *args, **kwargs) + name = name.split(':') + name = qname(*name) if len(name) == 2 else name[0] + return _element_maker(name, *args, **kwargs) Writer = BaseWriter('XML') @@ -37,20 +39,50 @@ def write(obj, **kwargs): # Utility functions -def nameable(obj, elem): - for locale, label in obj.name.localizations.items(): - child = Element('com:Name', label) +def i11lstring(obj, name): + """InternationalString. + + Returns a list of elements with name `name`. + """ + elems = [] + + for locale, label in obj.localizations.items(): + child = Element(name, label) child.set(qname('xml', 'lang'), locale) - elem.append(child) + elems.append(child) + + return elems -def maintainable(obj): - urn = sdmx.urn.make(obj) - elem = Element(f'str:{obj.__class__.__name__}', urn=urn) - nameable(obj, elem) +def annotable(obj, name, *args, **kwargs): + elem = Element(name, *args, **kwargs) + + if len(obj.annotations): + e_anno = Element('com:Annotations') + e_anno.extend(Writer.recurse(a) for a in obj.annotations) + elem.append(e_anno) + return elem +def identifiable(obj, name, *args, **kwargs): + return annotable(obj, name, *args, id=obj.id, **kwargs) + + +def nameable(obj, name, *args, **kwargs): + elem = identifiable(obj, name, *args, **kwargs) + elem.extend(i11lstring(obj.name, 'com:Name')) + return elem + + +def maintainable(obj, parent=None): + return nameable( + obj, + f'str:{obj.__class__.__name__}', + urn=sdmx.urn.make(obj, parent), + ) + + @Writer.register def _(obj: message.StructureMessage): msg = Element('mes:Structure') @@ -77,11 +109,21 @@ def _(obj: model.ItemScheme): @Writer.register def _(obj: model.Item, parent): - # NB this isn't correct: produces .Codelist instead of .Code - elem = Element( - f'str:{obj.__class__.__name__}', - id=obj.id, - urn=sdmx.urn.make(obj, parent), - ) - nameable(obj, elem) + elem = maintainable(obj, parent=parent) + + if obj.parent: + # Reference to parent code + e_parent = Element('str:Parent') + e_parent.append(Element('Ref', id=obj.parent.id)) + elem.append(e_parent) + + return elem + + +@Writer.register +def _(obj: model.Annotation): + elem = Element('com:Annotation') + if obj.id: + elem.attrib['id'] = obj.id + elem.extend(i11lstring(obj.text, 'com:AnnotationText')) return elem