Skip to content

Commit

Permalink
Merge pull request #38 from octue/release/0.0.12
Browse files Browse the repository at this point in the history
Release/0.0.12
  • Loading branch information
thclark authored Oct 3, 2020
2 parents 7895bd0 + ce304f0 commit c3874db
Show file tree
Hide file tree
Showing 10 changed files with 176 additions and 23 deletions.
2 changes: 1 addition & 1 deletion examples/damage_classifier_service/twine.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
// Manifest strands contain lists, with one entry for each required dataset
"configuration_manifest_filters": [
"configuration_manifest": [
{
// Once the inputs are validated, your analysis program can use this key to access the dataset
"key": "trained_model",
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

setup(
name="twined",
version="0.0.12-alpha",
version="0.0.12",
py_modules=[],
install_requires=["jsonschema ~= 3.2.0", "python-dotenv"],
url="https://www.github.com/octue/twined",
Expand Down
2 changes: 1 addition & 1 deletion tests/test_schema_strands.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def test_invalid_strand(self):
twine = Twine(source=twine_file)
values_file = self.path + "values/configurations/configuration_valid.json"
data = twine._load_json("configuration", source=values_file)
with self.assertRaises(exceptions.TwineTypeException):
with self.assertRaises(exceptions.UnknownStrand):
twine._validate_against_schema("not_a_strand_name", data)

def test_missing_values_files(self):
Expand Down
5 changes: 4 additions & 1 deletion tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ def test_encoder_without_numpy(self):
with self.assertRaises(TypeError) as e:
json.dumps(some_json, cls=TwinedEncoder)

self.assertEqual("Object of type 'ndarray' is not JSON serializable", e.exception.args[0])
# Very annoying behaviour change between python 3.6 and 3.8
py38 = "Object of type 'ndarray' is not JSON serializable" in e.exception.args[0]
py36 = "Object of type ndarray is not JSON serializable" in e.exception.args[0]
self.assertTrue(py36 or py38)

def test_encoder_with_numpy(self):
""" Ensures that the json encoder can work with numpy installed
Expand Down
10 changes: 9 additions & 1 deletion twined/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
from . import exceptions # noqa: F401
from . import utils # noqa: F401
from .twine import Twine # noqa: F401
from .twine import ( # noqa: F401
ALL_STRANDS,
CHILDREN_STRANDS,
CREDENTIAL_STRANDS,
MANIFEST_STRANDS,
SCHEMA_STRANDS,
Twine,
)
10 changes: 10 additions & 0 deletions twined/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ class NotImplementedYet(TwineException):
"""


class TwineValueException(TwineException, ValueError):
""" Raised when a python ValueError is appropriate to ensure all errors still also inherit from TwineException
"""


class TwineTypeException(TwineException, TypeError):
""" Raised when a python TypeError is appropriate to ensure all errors still also inherit from TwineException
"""
Expand Down Expand Up @@ -42,6 +47,11 @@ class InvalidTwineContents(InvalidTwine, ValidationError):
# --------------------- Exceptions relating to accessing/setting strands ------------------------


class UnknownStrand(TwineException, ValueError):
""" Raised when referencing a strand which is not defined in ALL_STRANDS
"""


class StrandNotFound(TwineException, KeyError):
""" Raised when the attempting to access a strand not present in the twine
"""
Expand Down
2 changes: 1 addition & 1 deletion twined/schema/manifest_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
},
"sequence": {
"description": "The ordering on the file, if any, within its group/cluster",
"type": "integer"
"type": ["integer", "null"]
},
"cluster": {
"description": "The group, or cluster, to which the file belongs",
Expand Down
155 changes: 140 additions & 15 deletions twined/twine.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from jsonschema import ValidationError, validate as jsonschema_validate

from . import exceptions
from .utils import load_json
from .utils import load_json, trim_suffix


logger = logging.getLogger(__name__)
Expand All @@ -28,25 +28,35 @@

CHILDREN_STRANDS = ("children",)

MONITOR_STRANDS = ("monitors",)

ALL_STRANDS = (
*SCHEMA_STRANDS,
*MANIFEST_STRANDS,
*CREDENTIAL_STRANDS,
*CHILDREN_STRANDS,
*MONITOR_STRANDS,
)


class Twine:
def __init__(self, **kwargs):
""" Instantiate a twine class, providing a file name or a utf-8 encoded string containing valid json.
The twine is itself validated to be correct against the twine schema.
""" Twine class manages validation of inputs and outputs to/from a data service, based on spec in a 'twine' file.
Instantiate a Twine by providing a file name or a utf-8 encoded string containing valid json.
The twine is itself validated to be correct on instantiation of Twine().
Note: Instantiating the twine does not validate that any inputs to an application are correct - it merely
checks that the twine itself is correct.
Note: Instantiating the twine does not validate that any inputs to an application are correct - it merely
checks that the twine itself is correct.
"""

def __init__(self, **kwargs):
""" Constructor for the twine class
"""
self._load_twine(**kwargs)

self._available_strands = tuple(trim_suffix(k, "_schema") for k in self._raw.keys())

def _load_twine(self, source=None):
""" Load twine from a *.json filename, file-like or a json string and validates twine contents
"""
Expand Down Expand Up @@ -106,7 +116,7 @@ def _validate_against_schema(self, strand, data):

else:
if strand not in SCHEMA_STRANDS:
raise exceptions.TwineTypeException(f"Unknown strand {strand}. Try one of {ALL_STRANDS}.")
raise exceptions.UnknownStrand(f"Unknown strand {strand}. Try one of {ALL_STRANDS}.")
schema_key = strand + "_schema"
try:
schema = self._raw[schema_key]
Expand All @@ -133,26 +143,46 @@ def _validate_twine_version(self):
f"Twined library version conflict. Twine file requires {twine_file_twined_version} but you have {installed_twined_version} installed"
)

def _validate_values(self, kind, source, values_class=None, **kwargs):
def _validate_values(self, kind, source, cls=None, **kwargs):
""" Common values validator method
"""
data = self._load_json(kind, source, **kwargs)
self._validate_against_schema(kind, data)
if values_class:
# TODO create a values object from the data
pass
if cls:
return cls(**data)
return data

def _validate_manifest(self, kind, source, manifest_class=None, **kwargs):
def _validate_manifest(self, kind, source, cls=None, **kwargs):
""" Common manifest validator method
"""
data = self._load_json(kind, source, **kwargs)

# TODO elegant way of cleaning up this nasty serialisation hack to manage conversion of outbound manifests to primitive
inbound = True
if hasattr(data, "serialise"):
inbound = False
data = data.serialise()

self._validate_against_schema(kind, data)
if manifest_class:
# TODO create a manifest object and verify that all the required keys etc are there
pass

if cls and inbound:
# TODO verify that all the required keys etc are there
return cls(**data)

return data

@property
def available_strands(self):
""" Tuple of strand names that are found in this twine
"""
return self._available_strands

@available_strands.setter
def available_strands(self, value):
""" Ensures available_strands is a read-only attribute
"""
raise exceptions.TwineValueException("Attribute available_strands is read only.")

def validate_children(self, **kwargs):
""" Validates that the children values, passed as either a file or a json string, are correct
"""
Expand Down Expand Up @@ -262,3 +292,98 @@ def validate_output_manifest(self, source, **kwargs):
""" Validates the output manifest, passed as either a file or a json string
"""
return self._validate_manifest("output_manifest", source, **kwargs)

@staticmethod
def _get_cls(name, cls):
""" Getter that will return cls[name] if cls is a dict or cls otherwise
"""
return cls.get(name, None) if isinstance(cls, dict) else cls

def validate(self, allow_missing=False, allow_extra=False, cls=None, **kwargs):
""" Validate strands from sources provided as keyword arguments
Usage:
```
self.twine.validate(
input_values=input_values,
input_manifest=input_manifest,
credentials=credentials,
children=children,
cls=CLASS_MAP,
allow_missing=False,
allow_extra=False,
)
```
:parameter allow_missing: If strand is present in the twine, but the source is equal to None, allow validation
to continue.
:type allow_missing: bool
:parameter allow_extra: If strand is present in the sources, but not in the twine, allow validation to continue
(only strands in the twine will be validated and converted, others will be returned as-is)
:type allow_extra: bool
:parameter cls: optional dict of classes keyed on strand name (alternatively, one single class which will be
applied to strands) which will be instantiated with the validated source data.
:type cls: dict or any
:return: dict of validated and initialised sources
:rtype: dict
"""

# pop any strand name:data pairs out of kwargs and into their own dict
source_kwargs = tuple(name for name in kwargs.keys() if name in ALL_STRANDS)
sources = dict((name, kwargs.pop(name)) for name in source_kwargs)

for strand_name, strand_data in sources.items():

if not allow_extra:
if (strand_data is not None) and (strand_name not in self.available_strands):
raise exceptions.StrandNotFound(
f"Source data is provided for '{strand_name}' but no such strand is defined in the twine"
)

if not allow_missing:
if (strand_name in self.available_strands) and (strand_data is None):
raise exceptions.TwineValueException(
f"The '{strand_name}' strand is defined in the twine, but no data is provided in sources"
)

if strand_data is not None:
# TODO Consider reintroducing a skip based on whether cls is already instantiated. For now, leave it the
# responsibility of the caller to determine what has already been validated and what hasn't.
# # Use the twine to validate and instantiate as the desired class
# if not isinstance(value, type(cls)):
# self.logger.debug(
# "Instantiating %s as %s and validating against twine", name, cls.__name__ if cls else "default_class"
# )
# return self.twine.validate(name, source=value, cls=cls)
method = getattr(self, f"validate_{strand_name}")
klass = self._get_cls(strand_name, cls)
sources[strand_name] = method(strand_data, cls=klass, **kwargs)

return sources

def validate_strand(self, name, source, **kwargs):
""" Validates a single strand by name
"""
return self.validate({name: source}, **kwargs)[name]

def prepare(self, *args, cls=None, **kwargs):
""" Prepares instance for strand data using a class map
"""
prepared = {}
for arg in args:
if arg not in ALL_STRANDS:
raise exceptions.UnknownStrand(f"Unknown strand '{arg}'")

elif arg not in self.available_strands:
prepared[arg] = None

else:
klass = self._get_cls(arg, cls)
prepared[arg] = klass(**kwargs) if klass else dict(**kwargs)
if hasattr(prepared[arg], "prepare"):
prepared[arg] = prepared[arg].prepare(self._raw[arg])

return prepared
5 changes: 3 additions & 2 deletions twined/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .encoders import TwinedEncoder # noqa:F401
from .load_json import load_json # noqa:F401
from .encoders import TwinedEncoder # noqa: F401
from .load_json import load_json # noqa: F401
from .strings import trim_suffix # noqa: F401
6 changes: 6 additions & 0 deletions twined/utils/strings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def trim_suffix(text, suffix):
""" Strip a suffix from text, if it appears (otherwise return text unchanged)
"""
if not text.endswith(suffix):
return text
return text[: len(text) - len(suffix)]

0 comments on commit c3874db

Please sign in to comment.