Skip to content

Commit

Permalink
MRG: Merge pull request #118 from octue/allow-optional-strands
Browse files Browse the repository at this point in the history
Allow optional strands
  • Loading branch information
cortadocodes authored Dec 13, 2024
2 parents a7d7e8d + eb3c74f commit e652b25
Show file tree
Hide file tree
Showing 10 changed files with 228 additions and 421 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:

steps:
- name: Checkout repository
uses: actions/checkout@v3
uses: actions/checkout@v4

# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/python-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ jobs:
if: "!contains(github.event.head_commit.message, 'skipci')"
runs-on: ubuntu-latest
env:
USING_COVERAGE: '3.9'
USING_COVERAGE: '3.11'
strategy:
matrix:
python: ['3.8', '3.9']
python: ['3.8', '3.9', '3.10', '3.11']
steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
Expand Down Expand Up @@ -60,7 +60,7 @@ jobs:
contents: read
steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Install Poetry
uses: snok/[email protected]
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ jobs:
if: "github.event.pull_request.merged == true"
runs-on: ubuntu-latest
env:
USING_COVERAGE: '3.9'
USING_COVERAGE: '3.11'
strategy:
matrix:
python: ['3.8', '3.9']
python: ['3.8', '3.9', '3.10', '3.11']
steps:
- name: Checkout Repository
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v5
Expand Down Expand Up @@ -48,7 +48,7 @@ jobs:
needs: run-tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Install Poetry
uses: snok/[email protected]
Expand Down
501 changes: 109 additions & 392 deletions poetry.lock

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "twined"
version = "0.5.6"
version = "0.6.0"
repository = "https://www.github.com/octue/twined"
description = "A library to help digital twins and data services talk to one another."
authors = [
Expand All @@ -27,8 +27,6 @@ jsonschema = "^4"
python-dotenv = ">=0,<=2"

[tool.poetry.group.dev.dependencies]
flake8 = "3.8.3"
black = "19.10.b0"
pre-commit = ">=2.6.0"
coverage = ">=5.2.1"
numpy = "^1"
Expand Down
5 changes: 5 additions & 0 deletions tests/test_manifest_strands.py
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,8 @@ def test_error_raised_if_multiple_datasets_have_same_name(self):

with self.assertRaises(KeyError):
twine.validate_input_manifest(source=input_manifest)

def test_missing_optional_manifest_does_not_raise_error(self):
"""Test that not providing an optional strand doesn't result in a validation error."""
twine = Twine(source={"output_manifest": {"datasets": {}, "optional": True}})
twine.validate(output_manifest=None)
5 changes: 5 additions & 0 deletions tests/test_schema_strands.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ def test_valid_with_extra_values(self):

Twine(source=VALID_SCHEMA_TWINE).validate_configuration_values(source=configuration_valid_with_extra_field)

def test_missing_optional_values_do_not_raise_error(self):
"""Test that not providing an optional strand doesn't result in a validation error."""
twine = Twine(source={"configuration_values_schema": {"optional": True}})
twine.validate(configuration_values=None)


if __name__ == "__main__":
unittest.main()
34 changes: 34 additions & 0 deletions tests/test_twine.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,37 @@ def test_available_strands_properties(self):
)

self.assertEqual(twine.available_manifest_strands, {"output_manifest"})

def test_required_strands_property(self):
"""Test that the required strands property is correct."""
twines = [
{
"configuration_values_schema": {},
"input_values_schema": {},
"output_values_schema": {},
"output_manifest": {"datasets": {}},
},
{
"configuration_values_schema": {"optional": True},
"input_values_schema": {},
"output_values_schema": {},
"output_manifest": {"datasets": {}, "optional": True},
},
{
"configuration_values_schema": {"optional": False},
"input_values_schema": {},
"output_values_schema": {},
"output_manifest": {"datasets": {}, "optional": False},
},
]

expected_required_strands = [
{"configuration_values", "input_values", "output_values", "output_manifest"},
{"input_values", "output_values"},
{"configuration_values", "input_values", "output_values", "output_manifest"},
]

for twine, expected in zip(twines, expected_required_strands):
with self.subTest(twine=twine):
twine = Twine(source=twine)
self.assertEqual(twine.required_strands, expected)
48 changes: 42 additions & 6 deletions twined/schema/twine_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@
"manifest": {
"type": "object",
"properties": {
"optional": {
"type": "boolean",
"description": "This should be `true` if the manifest is optional."
},
"datasets": {
"type": "object",
"description": "A list of entries, each describing a dataset that should be attached to / made available to the digital twin",
Expand Down Expand Up @@ -92,10 +96,20 @@
]
}
},
"configuration_manifest": {"$ref": "#/$defs/manifest"},
"configuration_manifest": {
"$ref": "#/$defs/manifest"
},
"configuration_values_schema": {
"type": "object",
"required": ["properties"]
"properties": {
"properties": {
"type": "object"
},
"optional": {
"type": "boolean",
"description": "This should be `true` if the configuration values are optional."
}
}
},
"credentials": {
"type": "array",
Expand All @@ -118,13 +132,35 @@
"additionalProperties": false
}
},
"input_manifest": {"$ref": "#/$defs/manifest"},
"input_manifest": {
"$ref": "#/$defs/manifest"
},
"input_values_schema": {
"type": "object"
"type": "object",
"properties": {
"properties": {
"type": "object"
},
"optional": {
"type": "boolean",
"description": "This should be `true` if the input values are optional."
}
}
},
"output_manifest": {
"$ref": "#/$defs/manifest"
},
"output_manifest": {"$ref": "#/$defs/manifest"},
"output_values_schema": {
"type": "object"
"type": "object",
"properties": {
"properties": {
"type": "object"
},
"optional": {
"type": "boolean",
"description": "This should be `true` if the output values are optional."
}
}
}
}
}
34 changes: 23 additions & 11 deletions twined/twine.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,16 @@ class Twine:
"""

def __init__(self, **kwargs):
self._available_strands = set()
self._required_strands = set()

for name, strand in self._load_twine(**kwargs).items():
setattr(self, name, strand)
self._available_strands.add(trim_suffix(name, "_schema"))

if isinstance(strand, dict) and not strand.get("optional", False):
self._required_strands.add(trim_suffix(name, "_schema"))

self._available_strands = set(trim_suffix(name, "_schema") for name in vars(self))
self._available_manifest_strands = self._available_strands & set(MANIFEST_STRANDS)

def _load_twine(self, source=None):
Expand Down Expand Up @@ -216,18 +222,26 @@ def _validate_all_expected_datasets_are_present_in_manifest(self, manifest_kind,
def available_strands(self):
"""Get the names of strands that are found in this twine.
:return set:
:return set(str):
"""
return self._available_strands

@property
def available_manifest_strands(self):
"""Get the names of the manifest strands that are found in this twine.
:return set:
:return set(str):
"""
return self._available_manifest_strands

@property
def required_strands(self):
"""Get the names of strands that are required in this twine.
:return set(str):
"""
return self._required_strands

def validate_children(self, source, **kwargs):
"""Validate that the children values, passed as either a file or a json string, are correct."""
# TODO cache this loaded data keyed on a hashed version of kwargs
Expand Down Expand Up @@ -339,7 +353,7 @@ def _get_cls(name, cls):
"""Getter that will return cls[name] if cls is a dict or cls otherwise"""
return cls.get(name, None) if isinstance(cls, dict) else cls

def validate(self, allow_missing=False, allow_extra=False, cls=None, **kwargs):
def validate(self, allow_extra=False, cls=None, **kwargs):
"""Validate strands from sources provided as keyword arguments
Usage:
Expand All @@ -350,31 +364,29 @@ def validate(self, allow_missing=False, allow_extra=False, cls=None, **kwargs):
credentials=credentials,
children=children,
cls=CLASS_MAP,
allow_missing=False,
allow_extra=False,
)
```
:param bool allow_missing: If strand is present in the twine, but the source is equal to None, allow validation to continue.
:param bool allow_extra: If strand is present in the sources, but not in the twine, allow validation to continue (only strands in the twine will be validated and converted, others will be returned as-is)
:param any cls: optional dict of classes keyed on strand name (alternatively, one single class which will be applied to strands) which will be instantiated with the validated source data.
:return dict: dict of validated and initialised sources
"""
# pop any strand name:data pairs out of kwargs and into their own dict
source_kwargs = tuple(name for name in kwargs.keys() if name in ALL_STRANDS)
sources = dict((name, kwargs.pop(name)) for name in source_kwargs)

for strand_name, strand_data in sources.items():
if not allow_extra:
if (strand_data is not None) and (strand_name not in self.available_strands):
raise exceptions.StrandNotFound(
f"Source data is provided for '{strand_name}' but no such strand is defined in the twine"
)

if not allow_missing:
if (strand_name in self.available_strands) and (strand_data is None):
raise exceptions.TwineValueException(
f"The '{strand_name}' strand is defined in the twine, but no data is provided in sources"
)
if (strand_name in self.required_strands) and (strand_data is None):
raise exceptions.TwineValueException(
f"The '{strand_name}' strand is defined in the twine, but no data is provided in sources"
)

if strand_data is not None:
# TODO Consider reintroducing a skip based on whether cls is already instantiated. For now, leave it the
Expand Down

0 comments on commit e652b25

Please sign in to comment.