diff --git a/.coveragerc b/.coveragerc deleted file mode 100644 index 07e304d14..000000000 --- a/.coveragerc +++ /dev/null @@ -1,3 +0,0 @@ -[run] -omit = - sdmx/experimental.py diff --git a/.gitignore b/.gitignore index fd2458b11..a3535bb3b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,12 @@ -*.py[cod] __pycache__ - +.eggs *.cache *.egg-info *.pdf +*.py[cod] # Development and build files -.coverage +.coverage* .pytest_cache build coverage.xml diff --git a/doc/api.rst b/doc/api.rst index 2237f8ab1..d609181d7 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -68,41 +68,77 @@ SDMX-JSON :undoc-members: -``writer``: Convert SDMX to pandas objects ------------------------------------------- +``writer``: Convert ``sdmx`` objects to other formats +----------------------------------------------------- + +.. _writer-pandas: + +``writer.pandas``: Convert to ``pandas`` objects +:::::::::::::::::::::::::::::::::::::::::::::::: + +.. currentmodule:: sdmx.writer.pandas + .. versionchanged:: 1.0 - :meth:`sdmx.to_pandas` (via :meth:`write `) - handles all types of objects, replacing the earlier, separate - ``data2pandas`` and ``structure2pd`` writers. + :meth:`sdmx.to_pandas` handles all types of objects, replacing the earlier, separate ``data2pandas`` and ``structure2pd`` writers. -.. automodule:: sdmx.writer - :members: - :exclude-members: write - - .. automethod:: sdmx.writer.write - - .. autosummary:: - write_component - write_datamessage - write_dataset - write_dict - write_dimensiondescriptor - write_itemscheme - write_list - write_membervalue - write_nameableartefact - write_serieskeys - write_structuremessage - -.. autodata:: DEFAULT_RTYPE - :noindex: +:func:`.to_pandas` implements a dispatch pattern according to the type of *obj*. +Some of the internal methods take specific arguments and return varying values. +These arguments can be passed to :meth:`to_pandas` when `obj` is of the appropriate type: + +.. autosummary:: + sdmx.writer.pandas.write_dataset + sdmx.writer.pandas.write_datamessage + sdmx.writer.pandas.write_itemscheme + sdmx.writer.pandas.write_structuremessage + sdmx.writer.pandas.DEFAULT_RTYPE + +Other objects are converted as follows: + +:class:`.Component` + The :attr:`~.Concept.id` attribute of the :attr:`~.Component.concept_identity` is returned. + +:class:`.DataMessage` + The :class:`.DataSet` or data sets within the Message are converted to pandas objects. + Returns: + + - :class:`pandas.Series` or :class:`pandas.DataFrame`, if `obj` has only one data set. + - list of (Series or DataFrame), if `obj` has more than one data set. + +:class:`.dict` + The values of the mapping are converted individually. + If the resulting values are :class:`str` or Series *with indexes that share the same name*, then they are converted to a Series, possibly with a :class:`pandas.MultiIndex`. + Otherwise, a :class:`.DictLike` is returned. + +:class:`.DimensionDescriptor` + The :attr:`~.DimensionDescriptor.components` of the DimensionDescriptor are written. + +:class:`list` + For the following *obj*, returns Series instead of a :class:`list`: + + - a list of :class:`.Observation`: the Observations are written using :meth:`write_dataset`. + - a list with only 1 :class:`.DataSet` (e.g. the :attr:`~.DataMessage.data` attribute of :class:`.DataMessage`): the Series for the single element is returned. + - a list of :class:`.SeriesKey`: the key values (but no data) are returned. + +:class:`.NameableArtefact` + The :attr:`~.NameableArtefact.name` attribute of `obj` is returned. + +.. automodule:: sdmx.writer.pandas + :members: DEFAULT_RTYPE, write_dataset, write_datamessage, write_itemscheme, write_structuremessage .. todo:: Support selection of language for conversion of :class:`InternationalString `. +``writer.xml``: Write to SDMX-ML +:::::::::::::::::::::::::::::::: + +.. versionadded:: 1.1 + +See :func:`.to_xml`. + + ``remote``: Access SDMX REST web services ----------------------------------------- .. autoclass:: sdmx.remote.Session diff --git a/doc/whatsnew.rst b/doc/whatsnew.rst index f94f50f36..7e2194ac3 100644 --- a/doc/whatsnew.rst +++ b/doc/whatsnew.rst @@ -6,7 +6,11 @@ What's new? Next release (vX.Y.0) ===================== -- Test suite improvements +- New features: + + - :pull:`3`: Add :meth:`to_xml` to generate SDMX-ML for a subset of the IM. + +- Test suite: - :pull:`2`: Add tests of data queries for source(s): OECD diff --git a/sdmx/__init__.py b/sdmx/__init__.py index d2ae4542a..b2b974bf5 100644 --- a/sdmx/__init__.py +++ b/sdmx/__init__.py @@ -3,7 +3,7 @@ from sdmx.api import Request, read_sdmx, read_url from sdmx.source import add_source, list_sources from sdmx.util import Resource -from sdmx.writer import write as to_pandas +from sdmx.writer import to_pandas, to_xml import logging __all__ = [ @@ -15,6 +15,7 @@ 'read_sdmx', 'read_url', 'to_pandas', + 'to_xml', ] diff --git a/sdmx/format/xml.py b/sdmx/format/xml.py new file mode 100644 index 000000000..29105a3be --- /dev/null +++ b/sdmx/format/xml.py @@ -0,0 +1,23 @@ +from functools import lru_cache + +from lxml.etree import QName + + +# XML Namespaces +_base_ns = 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1' +NS = { + 'com': f'{_base_ns}/common', + 'data': f'{_base_ns}/data/structurespecific', + 'str': f'{_base_ns}/structure', + 'mes': f'{_base_ns}/message', + 'gen': f'{_base_ns}/data/generic', + 'footer': f'{_base_ns}/message/footer', + 'xml': 'http://www.w3.org/XML/1998/namespace', + 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', + } + + +@lru_cache() +def qname(ns, name): + """Return a fully-qualified tag *name* in namespace *ns*.""" + return QName(NS[ns], name) diff --git a/sdmx/model.py b/sdmx/model.py index bcfefcc7e..71ca9b4d3 100644 --- a/sdmx/model.py +++ b/sdmx/model.py @@ -108,15 +108,15 @@ def __init__(self, value=None, **kwargs): and isinstance(value[0], str)): # 2-tuple of str is (locale, label) value = {value[0]: value[1]} + elif isinstance(value, dict): + # dict; use directly + pass elif isinstance(value, IterableABC): # Iterable of 2-tuples value = {locale: label for (locale, label) in value} elif value is None: # Keyword arguments → dict, possibly empty value = dict(kwargs) - elif isinstance(value, dict): - # dict; use directly - pass else: raise ValueError(value, kwargs) @@ -141,7 +141,7 @@ def __add__(self, other): result.localizations.update(other.localizations) return result - def localized_default(self, locale): + def localized_default(self, locale=None): """Return the string in *locale*, or else the first defined.""" try: return self.localizations[locale] @@ -311,12 +311,12 @@ def __init__(self, *args, **kwargs): # Add this Item as a child of its parent parent = kwargs.get('parent', None) - if parent and self not in parent.child: - parent.child.append(self) + if parent: + parent.append_child(self) # Add this Item as a parent of its children for c in kwargs.get('child', []): - c.parent = self + self.append_child(c) def __contains__(self, item): """Recursive containment.""" @@ -324,6 +324,11 @@ def __contains__(self, item): if item == c or item in c: return True + def append_child(self, other): + if other not in self.child: + self.child.append(other) + other.parent = self + def get_child(self, id): """Return the child with the given *id*.""" for c in self.child: @@ -1621,3 +1626,32 @@ class ProvisionAgreement(MaintainableArtefact, ConstrainableArtefact): structure_usage: StructureUsage = None #: data_provider: DataProvider = None + + +#: The SDMX-IM defines 'packages'; these are used in URNs. +PACKAGE = dict() + +_PACKAGE_CLASS = { + 'base': {Agency, AgencyScheme, DataProvider}, + 'categoryscheme': {Category, Categorisation, CategoryScheme}, + 'codelist': {Code, Codelist}, + 'conceptscheme': {Concept, ConceptScheme}, + 'datastructure': {DataflowDefinition, DataStructureDefinition}, + 'registry': {ContentConstraint, ProvisionAgreement}, + } + +for package, classes in _PACKAGE_CLASS.items(): + PACKAGE.update({cls: package for cls in classes}) + + +def get_class(cls, package=None): + """Return a class object for string *cls* and *package* names.""" + if isinstance(cls, str): + if cls in 'Dataflow DataStructure': + cls += 'Definition' + cls = globals()[cls] + + if package and package != PACKAGE[cls]: + raise ValueError(f'Package {repr(package)} invalid for {cls}') + + return cls diff --git a/sdmx/reader/sdmxml.py b/sdmx/reader/sdmxml.py index eee3fe5fe..876863f4a 100644 --- a/sdmx/reader/sdmxml.py +++ b/sdmx/reader/sdmxml.py @@ -12,6 +12,7 @@ from lxml.etree import QName, XPath from sdmx.exceptions import ParseError, XMLParseError +from sdmx.format.xml import NS, qname from sdmx.message import ( DataMessage, ErrorMessage, Footer, Header, StructureMessage, ) @@ -32,38 +33,12 @@ ) from sdmx.reader import BaseReader +import sdmx.urn log = logging.getLogger(__name__) -# Regular expression for URNs used as references -URN = re.compile(r'urn:sdmx:org\.sdmx\.infomodel' - r'\.(?P[^\.]*)' - r'\.(?P[^=]*)=((?P[^:]*):)?' - r'(?P[^\(\.]*)(\((?P[\d\.]*)\))?' - r'(\.(?P.*))?') - - -# XML namespaces -_base_ns = 'http://www.sdmx.org/resources/sdmxml/schemas/v2_1' -NS = { - 'com': f'{_base_ns}/common', - 'data': f'{_base_ns}/data/structurespecific', - 'str': f'{_base_ns}/structure', - 'mes': f'{_base_ns}/message', - 'gen': f'{_base_ns}/data/generic', - 'footer': f'{_base_ns}/message/footer', - 'xml': 'http://www.w3.org/XML/1998/namespace', - 'xsi': 'http://www.w3.org/2001/XMLSchema-instance', - } - - -def qname(ns, name): - """Return a fully-qualified tag *name* in namespace *ns*.""" - return QName(NS[ns], name) - - _TO_SNAKE_RE = re.compile('([A-Z]+)') @@ -176,30 +151,6 @@ def to_snake(value): } -# Mappings from SDMX-ML 'package' to contained classes -PACKAGE_CLASS = { - 'base': {Agency, AgencyScheme, DataProvider}, - 'categoryscheme': {Category, Categorisation, CategoryScheme}, - 'codelist': {Code, Codelist}, - 'conceptscheme': {Concept, ConceptScheme}, - 'datastructure': {DataflowDefinition, DataStructureDefinition}, - 'registry': {ContentConstraint, ProvisionAgreement}, - } - - -def get_class(package, cls): - """Return a class object for string *cls* and *package* names.""" - if isinstance(cls, str): - if cls in 'Dataflow DataStructure': - cls += 'Definition' - cls = getattr(sdmx.model, cls) - - assert cls in PACKAGE_CLASS[package], \ - f'Package {package!r} invalid for {cls}' - - return cls - - def wrap(value): """Return *value* as a list. @@ -477,8 +428,8 @@ def _maintained(self, cls=None, id=None, urn=None, **kwargs): regular expression. """ if urn: - match = URN.match(urn).groupdict() - cls = get_class(match['package'], match['class']) + match = sdmx.urn.match(urn) + cls = sdmx.model.get_class(match['class'], match['package']) id = match['id'] # Re-add the URN to the kwargs @@ -630,21 +581,21 @@ def parse_ref(self, elem, parent=None): # Determine the class of the ref'd object try: # 'package' and 'class' attributes give the class directly - cls = get_class(attr.pop('package'), attr.pop('class')) + cls = sdmx.model.get_class(attr.pop('class'), attr.pop('package')) except KeyError: # No 'package' and 'class' attributes if parent == 'Parent': # Ref to parent of an Item in an ItemScheme; the ref'd object # has the same class as the Item - cls = getattr(sdmx.model, self._stack[-1]) + cls = sdmx.model.get_class(self._stack[-1]) elif parent in ('AttachmentGroup', 'Group'): cls = GroupDimensionDescriptor elif parent in ('Dimension', 'DimensionReference'): # References to Dimensions cls = [Dimension, TimeDimension] else: - cls = getattr(sdmx.model, parent) + cls = sdmx.model.get_class(parent) # Get or instantiate the object itself try: @@ -779,7 +730,7 @@ def parse_dataset(self, elem): self._current[(DataStructureDefinition, None)] = dsd # DataSet class, e.g. GenericDataSet for root XML tag 'GenericData' - DataSetClass = getattr(sdmx.model, f'{self._stack[0]}Set') + DataSetClass = sdmx.model.get_class(f'{self._stack[0]}Set') # Create the object ds = DataSetClass(structured_by=dsd) @@ -955,7 +906,7 @@ def parse_structures(self, elem): return self._parse(elem, unwrap=False) def parse_organisation(self, elem): - cls = getattr(sdmx.model, QName(elem).localname) + cls = sdmx.model.get_class(QName(elem).localname) o, values = self._named(cls, elem) o.contact = wrap(values.pop('contact', [])) assert len(values) == 0 @@ -1046,12 +997,12 @@ def parse_conceptidentity(self, elem): raise ValueError(values) # URN should refer to a Concept - match = URN.match(values['urn']).groupdict() + match = sdmx.urn.match(values['urn']) if match['class'] != 'Concept': raise ValueError(values['urn']) # Look up the parent ConceptScheme - cls = get_class(match['package'], 'ConceptScheme') + cls = sdmx.model.get_class('ConceptScheme', match['package']) cs = self._maintained(cls=cls, id=match['id']) # Get or create the Concept within *cs* @@ -1068,7 +1019,7 @@ def parse_constraintattachment(self, elem): return result def parse_orgscheme(self, elem): - cls = getattr(sdmx.model, QName(elem).localname) + cls = sdmx.model.get_class(QName(elem).localname) os, values = self._named(cls, elem, unwrap=False) # Get the list of organisations. The following assumes that the # *values* dict has only one item. Otherwise, the returned item will be @@ -1123,7 +1074,7 @@ def parse_componentlist(self, elem): # fixed to 'DimensionDescriptor'." cls_name = QName(elem).localname.replace('List', 'Descriptor') finally: - ComponentListClass = getattr(sdmx.model, cls_name) + ComponentListClass = sdmx.model.get_class(cls_name) cl = ComponentListClass( components=list(chain(*self._parse(elem, unwrap=False).values())), @@ -1141,7 +1092,7 @@ def parse_dimension(self, elem): values = self._parse(elem) # Object class: Dimension, MeasureDimension, or TimeDimension - DimensionClass = getattr(sdmx.model, QName(elem).localname) + DimensionClass = sdmx.model.get_class(QName(elem).localname) args = copy(elem.attrib) try: diff --git a/sdmx/tests/test_reader_json.py b/sdmx/tests/reader/test_json.py similarity index 88% rename from sdmx/tests/test_reader_json.py rename to sdmx/tests/reader/test_json.py index 9c9db7b9e..261fa15a1 100644 --- a/sdmx/tests/test_reader_json.py +++ b/sdmx/tests/reader/test_json.py @@ -1,7 +1,6 @@ -import sdmx import pytest - -from .data import specimen, test_files +import sdmx +from sdmx.tests.data import specimen, test_files @pytest.mark.parametrize('path', **test_files(format='json')) diff --git a/sdmx/tests/test_reader_xml.py b/sdmx/tests/reader/test_reader_xml.py similarity index 98% rename from sdmx/tests/test_reader_xml.py rename to sdmx/tests/reader/test_reader_xml.py index 2c822d186..c7936ceef 100644 --- a/sdmx/tests/test_reader_xml.py +++ b/sdmx/tests/reader/test_reader_xml.py @@ -1,12 +1,11 @@ from lxml.etree import Element +import pytest import sdmx from sdmx.model import ( Facet, FacetType, FacetValueType, ) from sdmx.reader.sdmxml import XMLParseError, Reader -import pytest - -from .data import specimen, test_files +from sdmx.tests.data import specimen, test_files # Read example data files diff --git a/sdmx/tests/test_sources.py b/sdmx/tests/test_sources.py index d35b853c2..b54d3f90f 100644 --- a/sdmx/tests/test_sources.py +++ b/sdmx/tests/test_sources.py @@ -7,10 +7,11 @@ import logging import os +import sdmx +from sdmx import Resource from sdmx.api import Request from sdmx.exceptions import HTTPError from sdmx.source import DataContentType, sources -from sdmx.util import Resource import pytest import requests_mock @@ -129,6 +130,8 @@ def test_endpoints(self, req, endpoint, args): # print(cache, cache.read_text(), result, sep='\n\n') # assert False + sdmx.to_pandas(result) + del result diff --git a/sdmx/tests/writer/conftest.py b/sdmx/tests/writer/conftest.py new file mode 100644 index 000000000..e415056d4 --- /dev/null +++ b/sdmx/tests/writer/conftest.py @@ -0,0 +1,49 @@ +import pytest +from sdmx.message import StructureMessage +from sdmx.model import Agency, Annotation, Code, Codelist + + +@pytest.fixture +def codelist(): + """A Codelist for writer testing.""" + ECB = Agency(id='ECB') + + cl = Codelist( + id='CL_COLLECTION', + version='1.0', + is_final=False, + is_external_reference=False, + maintainer=ECB, + name={'en': 'Collection indicator code list'}, + ) + + cl.items['A'] = Code( + id='A', + name={'en': "Average of observations through period"}, + ) + cl.items['B'] = Code( + id='B', + name={'en': 'Beginning of period'}, + ) + cl.items['B1'] = Code( + id='B1', + name={'en': 'Child code of B'}, + ) + cl.items['B'].append_child(cl.items['B1']) + + cl.items['A'].annotations.append(Annotation( + id='A1', + type='NOTE', + text={'en': 'Text annotation on Code A.'}, + )) + + return cl + + +@pytest.fixture +def structuremessage(codelist): + """A StructureMessage for writer testing.""" + sm = StructureMessage() + sm.codelist[codelist.id] = codelist + + return sm diff --git a/sdmx/tests/test_writer.py b/sdmx/tests/writer/test_pandas.py similarity index 99% rename from sdmx/tests/test_writer.py rename to sdmx/tests/writer/test_pandas.py index dd928aa61..bed608894 100644 --- a/sdmx/tests/test_writer.py +++ b/sdmx/tests/writer/test_pandas.py @@ -6,8 +6,8 @@ import sdmx from sdmx.model import TimeDimension -from . import assert_pd_equal -from .data import expected_data, specimen, test_files +from sdmx.tests import assert_pd_equal +from sdmx.tests.data import expected_data, specimen, test_files # file name → (exception raised, exception message, comment/reason) diff --git a/sdmx/tests/writer/test_protobuf.py b/sdmx/tests/writer/test_protobuf.py new file mode 100644 index 000000000..f2bddee4c --- /dev/null +++ b/sdmx/tests/writer/test_protobuf.py @@ -0,0 +1,21 @@ +import logging + +import pytest +from sdmx.message import StructureMessage +from sdmx.writer.protobuf import write as to_protobuf + + +@pytest.mark.xfail(raises=RuntimeError, + match='sdmx.format.protobuf_pb2 missing') +def test_codelist(caplog, codelist): + msg = StructureMessage() + msg.codelist[codelist.id] = codelist + + caplog.set_level(logging.ERROR) + + result = to_protobuf(msg) + + print(result) + + # No errors logged + assert len(caplog.messages) == 0 diff --git a/sdmx/tests/writer/test_writer_xml.py b/sdmx/tests/writer/test_writer_xml.py new file mode 100644 index 000000000..3f071eb65 --- /dev/null +++ b/sdmx/tests/writer/test_writer_xml.py @@ -0,0 +1,32 @@ +import pytest +import sdmx +from sdmx.message import DataMessage + + +def test_codelist(tmp_path, codelist): + result = sdmx.to_xml(codelist, pretty_print=True) + print(result.decode()) + + +def test_structuremessage(tmp_path, structuremessage): + result = sdmx.to_xml(structuremessage, pretty_print=True) + print(result.decode()) + + # Message can be round-tripped to/from file + path = tmp_path / 'output.xml' + path.write_bytes(result) + msg = sdmx.read_sdmx(path) + + # Contents match the original object + assert ( + msg.codelist['CL_COLLECTION']['A'].name['en'] + == structuremessage.codelist['CL_COLLECTION']['A'].name['en'] + ) + + +def test_not_implemented(): + msg = DataMessage() + + with pytest.raises(NotImplementedError, + match='write DataMessage to XML'): + sdmx.to_xml(msg) diff --git a/sdmx/urn.py b/sdmx/urn.py new file mode 100644 index 000000000..c71351943 --- /dev/null +++ b/sdmx/urn.py @@ -0,0 +1,43 @@ +import re + +from sdmx.model import PACKAGE, MaintainableArtefact + + +# Regular expression for URNs +URN = re.compile(r'urn:sdmx:org\.sdmx\.infomodel' + r'\.(?P[^\.]*)' + r'\.(?P[^=]*)=((?P[^:]*):)?' + r'(?P[^\(\.]*)(\((?P[\d\.]*)\))?' + r'(\.(?P.*))?') + +_BASE = ( + 'urn:sdmx:org.sdmx.infomodel.{package}.{obj.__class__.__name__}=' + '{ma.maintainer.id}:{ma.id}({ma.version}){extra_id}' +) + + +def make(obj, maintainable_parent=None): + """Create an SDMX URN for `obj`. + + If `obj` is not :class:`.MaintainableArtefact`, then `maintainable_parent` + must be supplied in order to construct the URN. + """ + if maintainable_parent: + ma = maintainable_parent + extra_id = f'.{obj.id}' + else: + ma = obj + extra_id = '' + + assert isinstance(ma, MaintainableArtefact) + + return _BASE.format( + package=PACKAGE[obj.__class__], + obj=obj, + ma=ma, + extra_id=extra_id, + ) + + +def match(string): + return URN.match(string).groupdict() diff --git a/sdmx/writer/__init__.py b/sdmx/writer/__init__.py new file mode 100644 index 000000000..72c1e449f --- /dev/null +++ b/sdmx/writer/__init__.py @@ -0,0 +1,7 @@ +from .pandas import write as to_pandas +from .xml import write as to_xml + +__all__ = [ + 'to_pandas', + 'to_xml', +] diff --git a/sdmx/writer/base.py b/sdmx/writer/base.py new file mode 100644 index 000000000..61c0534db --- /dev/null +++ b/sdmx/writer/base.py @@ -0,0 +1,66 @@ +from functools import singledispatch + + +class BaseWriter: + """Base class for recursive writers. + + Usage: + + - Create an instance of this class. + - Use :meth:`register` in the same manner as Python's built-in + :func:`functools.singledispatch` to decorate functions that certain types + of :mod:`sdmx.model` or :mod:`sdmx.message` objects. + - Call :meth:`recurse` to kick off recursive writing of objects, including + from inside other functions. + + Example + ------- + MyWriter = BaseWriter('my') + + @MyWriter.register + def _(obj: sdmx.model.ItemScheme): + ... code to write an ItemScheme ... + return result + + @MyWriter.register + def _(obj: sdmx.model.Codelist): + ... code to write a Codelist ... + return result + """ + def __init__(self, format_name): + # Create the single-dispatch function + @singledispatch + def func(obj, *args, **kwargs): + raise NotImplementedError(f'write {obj.__class__.__name__} to ' + f'{format_name}') + + self._dispatcher = func + + def recurse(self, obj, *args, **kwargs): + """Recursively write *obj*. + + If there is no :meth:`register` 'ed function to write the class of + `obj`, then the parent class of `obj` is used to find a method. + """ + # TODO use a cache to speed up the MRO does not need to be traversed + # for every object instance + + dispatcher = getattr(self, '_dispatcher') + try: + # Let the single dispatch function choose the overload + return dispatcher(obj, *args, **kwargs) + except NotImplementedError as exc: + try: + # Use the object's parent class to get a different overload + func = dispatcher.registry[obj.__class__.mro()[1]] + except KeyError: + # Overload for the parent class did not exist + raise exc + + return func(obj, *args, **kwargs) + + def register(self, func): + """Register *func* as a writer for a particular object type.""" + dispatcher = getattr(self, '_dispatcher') + dispatcher.register(func) + return func diff --git a/sdmx/writer.py b/sdmx/writer/pandas.py similarity index 83% rename from sdmx/writer.py rename to sdmx/writer/pandas.py index dcc3596fa..a98d946ed 100644 --- a/sdmx/writer.py +++ b/sdmx/writer/pandas.py @@ -3,28 +3,19 @@ import numpy as np import pandas as pd -from sdmx import model +from sdmx import message, model from sdmx.model import ( DEFAULT_LOCALE, - AgencyScheme, AllDimensions, DataAttribute, - DataflowDefinition, - DataStructureDefinition, DataSet, Dimension, DimensionComponent, - # DimensionDescriptor, - CategoryScheme, - Codelist, - Component, - ConceptScheme, - ItemScheme, - NameableArtefact, Observation, SeriesKey, TimeDimension, ) +from sdmx.writer.base import BaseWriter from sdmx.util import DictLike @@ -33,51 +24,21 @@ DEFAULT_RTYPE = 'rows' -# Class → common write_*() methods -_ALIAS = { - DictLike: dict, - AgencyScheme: ItemScheme, - CategoryScheme: ItemScheme, - ConceptScheme: ItemScheme, - Codelist: ItemScheme, - DataflowDefinition: NameableArtefact, - DataStructureDefinition: NameableArtefact, - Dimension: Component, - TimeDimension: Component, - model.GenericDataSet: DataSet, - model.GenericTimeSeriesDataSet: DataSet, - model.StructureSpecificDataSet: DataSet, - model.StructureSpecificTimeSeriesDataSet: DataSet, -} +Writer = BaseWriter('pandas') def write(obj, *args, **kwargs): """Convert an SDMX *obj* to :mod:`pandas` object(s). - Implements a dispatch pattern according to the type of *obj*. For instance, - a :class:`.DataSet` object is converted using :func:`.write_dataset`. See - individual ``write_*`` methods named for more information on their - behaviour, including accepted *args* and *kwargs*. + See :ref:`sdmx.writer.pandas `. """ - cls = obj.__class__ - function = 'write_' + _ALIAS.get(cls, cls).__name__.lower() - return globals()[function](obj, *args, **kwargs) + return Writer.recurse(obj, *args, **kwargs) # Functions for Python containers -def write_list(obj, *args, **kwargs): - """Convert a :class:`list` of SDMX objects. - - For the following *obj*, :meth:`write_list` returns :class:`pandas.Series` - instead of a :class:`list`: - - - a list of :class:`.Observation`: the Observations are written using - :meth:`write_dataset`. - - a list with only 1 :class:`.DataSet` (e.g. the - :attr:`~.DataMessage.data>` attribute of :class:`.DataMessage`): the - Series for the single element is returned. - - a list of :class:`.SeriesKey`: the key values (but no data) are returned. - """ +@Writer.register +def _(obj: list, *args, **kwargs): + """Convert a :class:`list` of SDMX objects.""" if isinstance(obj[0], Observation): return write_dataset(obj, *args, **kwargs) elif isinstance(obj[0], DataSet) and len(obj) == 1: @@ -88,14 +49,9 @@ def write_list(obj, *args, **kwargs): return [write(item, *args, **kwargs) for item in obj] -def write_dict(obj, *args, **kwargs): - """Convert mappings. - - The values of the mapping are write()'d individually. If the resulting - values are :class:`str` or :class:`pd.Series` *with indexes that share the - same name*, then they are converted to a pd.Series, possibly with a - pd.MultiIndex. Otherwise, a DictLike is returned. - """ +@Writer.register +def _(obj: dict, *args, **kwargs): + """Convert mappings.""" result = {k: write(v, *args, **kwargs) for k, v in obj.items()} result_type = set(type(v) for v in result.values()) @@ -120,18 +76,18 @@ def write_dict(obj, *args, **kwargs): raise ValueError(result_type) -def write_set(obj, *args, **kwargs): +@Writer.register +def _(obj: set, *args, **kwargs): """Convert :class:`set`.""" result = {write(o, *args, **kwargs) for o in obj} return result # Functions for message classes -def write_datamessage(obj, *args, rtype=None, **kwargs): +@Writer.register +def write_datamessage(obj: message.DataMessage, *args, rtype=None, **kwargs): """Convert :class:`.DataMessage`. - The data set(s) within the message are converted to pandas objects. - Parameters ---------- rtype : 'compat' or 'rows', optional @@ -162,7 +118,9 @@ def write_datamessage(obj, *args, rtype=None, **kwargs): return [write(ds, *args, **kwargs) for ds in obj.data] -def write_structuremessage(obj, include=None, **kwargs): +@Writer.register +def write_structuremessage(obj: message.StructureMessage, include=None, + **kwargs): """Convert :class:`.StructureMessage`. Parameters @@ -210,16 +168,14 @@ def write_structuremessage(obj, include=None, **kwargs): # Functions for model classes -def write_component(obj): - """Convert :class:`.Component`. - - The :attr:`~.Concept.id` attribute of the - :attr:`~.Component.concept_identity` is returned. - """ +@Writer.register +def _(obj: model.Component): + """Convert :class:`.Component`.""" return str(obj.concept_identity.id) -def write_contentconstraint(obj, **kwargs): +@Writer.register +def _(obj: model.ContentConstraint, **kwargs): """Convert :class:`.ContentConstraint`.""" if len(obj.data_content_region) != 1: raise NotImplementedError @@ -227,7 +183,8 @@ def write_contentconstraint(obj, **kwargs): return write(obj.data_content_region[0], **kwargs) -def write_cuberegion(obj, **kwargs): +@Writer.register +def _(obj: model.CubeRegion, **kwargs): """Convert :class:`.CubeRegion`.""" result = DictLike() for dim, memberselection in obj.member.items(): @@ -236,8 +193,9 @@ def write_cuberegion(obj, **kwargs): return result -def write_dataset(obj, attributes='', dtype=np.float64, constraint=None, - datetime=False, **kwargs): +@Writer.register +def write_dataset(obj: model.DataSet, attributes='', dtype=np.float64, + constraint=None, datetime=False, **kwargs): """Convert :class:`~.DataSet`. See the :ref:`walkthrough ` for examples of using the `datetime` @@ -501,19 +459,20 @@ def _get_attrs(): return df -def write_dimensiondescriptor(obj): - """Convert :class:`.DimensionDescriptor`. - - The :attr:`~.DimensionDescriptor.components` of the DimensionDescriptor - are written. - """ +@Writer.register +def _(obj: model.DimensionDescriptor): + """Convert :class:`.DimensionDescriptor`.""" return write(obj.components) -def write_itemscheme(obj, locale=DEFAULT_LOCALE): +@Writer.register +def write_itemscheme(obj: model.ItemScheme, locale=DEFAULT_LOCALE): """Convert :class:`.ItemScheme`. - Names from *locale* are serialized. + Parameters + ---------- + locale : str, optional + Locale for names to return. Returns ------- @@ -558,21 +517,17 @@ def add_item(item): return result -def write_membervalue(obj): - """Convert :class:`.MemberValue`.""" +@Writer.register +def _(obj: model.MemberValue): return obj.value -def write_nameableartefact(obj): - """Convert :class:`.NameableArtefact`. - - The :attr:`~.NameableArtefact.name` attribute of *obj* is returned. - """ +@Writer.register +def _(obj: model.NameableArtefact): return str(obj.name) def write_serieskeys(obj): - """Convert a list of :class:`.SeriesKey`.""" result = [] for sk in obj: result.append({dim: kv.value for dim, kv in sk.order().values.items()}) diff --git a/sdmx/writer/protobuf.py b/sdmx/writer/protobuf.py new file mode 100644 index 000000000..b99a16168 --- /dev/null +++ b/sdmx/writer/protobuf.py @@ -0,0 +1,59 @@ +import logging + +try: + import sdmx.format.protobuf_pb2 as pb +except ImportError: + pb = None + + +log = logging.getLogger(__name__) + + +def write(obj, *args, **kwargs): + """Convert an SDMX *obj* to protobuf string.""" + if not pb: + raise RuntimeError('sdmx.format.protobuf_pb2 missing') + + return _write(obj, *args, **kwargs).SerializeToString() + + +def _write(obj, *args, **kwargs): + """Helper for :meth:`write`; returns :mod:`protobuf` object(s).""" + cls_name = obj.__class__.__name__ + func_name = f'write_{cls_name.lower()}' + try: + func = globals()[func_name] + except KeyError: + raise NotImplementedError(f'write {cls_name} to protobuf') + else: + return func(obj, *args, **kwargs) + + +def _copy(obj, pb_obj): + """Update the attributes of *pb_obj* from the sdmx.message/.model *obj*.""" + dir_logged = False + + for attr, value in obj.__dict__.items(): + if not value: + continue + + try: + setattr(pb_obj, attr, value) + log.info(f'Set {attr}') + except Exception as exc: + log.error(f'Failed to set {attr}: {exc}') + + if not dir_logged: + fields = filter(lambda n: not n.startswith('_'), dir(pb_obj)) + log.info(sorted(fields)) + dir_logged = True + + +def write_structuremessage(obj, *args, **kwargs): + envelope = pb.Envelope() + + for cl in obj.codelist.values(): + pb_obj = envelope.data.codelists.add() + _copy(cl, pb_obj) + + return envelope diff --git a/sdmx/writer/xml.py b/sdmx/writer/xml.py new file mode 100644 index 000000000..1a06f9c3d --- /dev/null +++ b/sdmx/writer/xml.py @@ -0,0 +1,129 @@ +from lxml import etree +from lxml.builder import ElementMaker + +from sdmx import message, model +from sdmx.format.xml import NS, qname +import sdmx.urn +from sdmx.writer.base import BaseWriter + + +_element_maker = ElementMaker(nsmap=NS) + + +def Element(name, *args, **kwargs): + name = name.split(':') + name = qname(*name) if len(name) == 2 else name[0] + return _element_maker(name, *args, **kwargs) + + +Writer = BaseWriter('XML') + + +def write(obj, **kwargs): + """Convert an SDMX *obj* to SDMX-ML. + + Parameters + ---------- + kwargs + Passed to :meth:`lxml.etree.to_string`, e.g. `pretty_print` = + :obj:`True`. + + Raises + ------ + NotImplementedError + If writing specific objects to SDMX-ML has not been implemented in + :mod:`sdmx`. + """ + return etree.tostring(Writer.recurse(obj), **kwargs) + + +# Utility functions + +def i11lstring(obj, name): + """InternationalString. + + Returns a list of elements with name `name`. + """ + elems = [] + + for locale, label in obj.localizations.items(): + child = Element(name, label) + child.set(qname('xml', 'lang'), locale) + elems.append(child) + + return elems + + +def annotable(obj, name, *args, **kwargs): + elem = Element(name, *args, **kwargs) + + if len(obj.annotations): + e_anno = Element('com:Annotations') + e_anno.extend(Writer.recurse(a) for a in obj.annotations) + elem.append(e_anno) + + return elem + + +def identifiable(obj, name, *args, **kwargs): + return annotable(obj, name, *args, id=obj.id, **kwargs) + + +def nameable(obj, name, *args, **kwargs): + elem = identifiable(obj, name, *args, **kwargs) + elem.extend(i11lstring(obj.name, 'com:Name')) + return elem + + +def maintainable(obj, parent=None): + return nameable( + obj, + f'str:{obj.__class__.__name__}', + urn=sdmx.urn.make(obj, parent), + ) + + +@Writer.register +def _(obj: message.StructureMessage): + msg = Element('mes:Structure') + + # Empty header element + msg.append(Element('mes:Header')) + + structures = Element('mes:Structures') + msg.append(structures) + + codelists = Element('mes:Codelists') + structures.append(codelists) + codelists.extend(Writer.recurse(cl) for cl in obj.codelist.values()) + + return msg + + +@Writer.register +def _(obj: model.ItemScheme): + elem = maintainable(obj) + elem.extend(Writer.recurse(i, parent=obj) for i in obj.items.values()) + return elem + + +@Writer.register +def _(obj: model.Item, parent): + elem = maintainable(obj, parent=parent) + + if obj.parent: + # Reference to parent code + e_parent = Element('str:Parent') + e_parent.append(Element('Ref', id=obj.parent.id)) + elem.append(e_parent) + + return elem + + +@Writer.register +def _(obj: model.Annotation): + elem = Element('com:Annotation') + if obj.id: + elem.attrib['id'] = obj.id + elem.extend(i11lstring(obj.text, 'com:AnnotationText')) + return elem diff --git a/setup.cfg b/setup.cfg index f80bfe46b..c5c2d7d5c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -61,3 +61,9 @@ addopts = sdmx -m "not experimental" markers = experimental: experimental features + +[coverage:run] +omit = + sdmx/experimental.py + sdmx/tests/writer/test_protobuf.py + sdmx/writer/protobuf.py