Skip to content

Commit

Permalink
IMP Added basic validation and testing for manifests
Browse files Browse the repository at this point in the history
  • Loading branch information
thclark committed Jun 23, 2020
1 parent 74f867e commit ac16299
Show file tree
Hide file tree
Showing 5 changed files with 215 additions and 1 deletion.
38 changes: 38 additions & 0 deletions tests/data/manifests/inputs/input_valid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
"datasets": [
{
"id": "7ead7669-8162-4f64-8cd5-4abe92509e17",
"name": "my meteorological dataset",
"tags": "met, mast, wind",
"files": [
{
"path": "input/datasets/7ead7669/file_1.csv",
"cluster": 0,
"sequence": 0,
"extension": "csv",
"tags": "",
"posix_timestamp": 0,
"id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
"last_modified": "2019-02-28T22:40:30.533005Z",
"name": "file_1.csv",
"size_bytes": 59684813,
"sha-512/256": "somesha"
},
{
"path": "input/datasets/7ead7669/file_2.csv",
"cluster": 0,
"sequence": 1,
"extension": "csv",
"tags": "",
"posix_timestamp": 0,
"id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45",
"last_modified": "2019-02-28T22:40:40.633001Z",
"name": "file_2.csv",
"size_bytes": 59684813,
"sha-512/256": "someothersha"
}
]
}
]
}
38 changes: 38 additions & 0 deletions tests/data/manifests/outputs/output_valid.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"id": "8ead7669-8162-4f64-8cd5-4abe92509e17",
"datasets": [
{
"id": "7ead7669-8162-4f64-8cd5-4abe92509e17",
"name": "my meteorological dataset",
"tags": "met, mast, wind",
"files": [
{
"path": "input/datasets/7ead7669/file_1.csv",
"cluster": 0,
"sequence": 0,
"extension": "csv",
"tags": "",
"posix_timestamp": 0,
"id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86",
"last_modified": "2019-02-28T22:40:30.533005Z",
"name": "file_1.csv",
"size_bytes": 59684813,
"sha-512/256": "somesha"
},
{
"path": "input/datasets/7ead7669/file_2.csv",
"cluster": 0,
"sequence": 1,
"extension": "csv",
"tags": "",
"posix_timestamp": 0,
"id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45",
"last_modified": "2019-02-28T22:40:40.633001Z",
"name": "file_2.csv",
"size_bytes": 59684813,
"sha-512/256": "someothersha"
}
]
}
]
}
14 changes: 14 additions & 0 deletions tests/data/twines/valid_manifest_twine.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"input_manifest": [
{
"key": "met_mast_data",
"purpose": "A dataset containing meteorological mast data",
"filters": "tags:(met* AND mast AND location) files:(extension:csv AND sequence:>=0) location:10"
},
{
"key": "scada_data",
"purpose": "A dataset containing scada data",
"filters": "tags:(met* AND mast) files:(extension:csv AND sequence:>=0) location:10"
}
]
}
110 changes: 110 additions & 0 deletions tests/test_manifest_strands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import unittest

from twined import Twine, exceptions

from .base import BaseTestCase


class TestManifestStrands(BaseTestCase):
""" Testing operation of the Twine class for validation of data using strands which require manifests
"""

def test_cannot_load_with_no_file_or_json(self):
""" Ensures the correct exception is thrown when manifest is unspecified
"""

twine_file = self.path + "twines/valid_manifest_twine.json"
twine = Twine(file=twine_file)
with self.assertRaises(exceptions.TwineTypeException):
twine.validate_input_manifest()

with self.assertRaises(exceptions.TwineTypeException):
twine.validate_output_manifest()

def test_missing_manifest_files(self):
""" Ensures that if you try to read values from missing files, the right exceptions get raised
"""
twine_file = self.path + "twines/valid_manifest_twine.json"
twine = Twine(file=twine_file)
file = self.path + "not_a_file.json"
with self.assertRaises(exceptions.InputManifestFileNotFound):
twine.validate_input_manifest(file=file)

with self.assertRaises(exceptions.OutputManifestFileNotFound):
twine.validate_output_manifest(file=file)

def test_valid_manifest_files(self):
""" Ensures that a manifest file will validate
"""
twine_file = self.path + "twines/valid_manifest_twine.json"
twine = Twine(file=twine_file)
file = self.path + "manifests/inputs/input_valid.json"
twine.validate_input_manifest(file=file)
file = self.path + "manifests/outputs/output_valid.json"
twine.validate_output_manifest(file=file)

# def test_empty_values(self):
# """ Ensures that appropriate errors are generated for invalid values
# """
# twine_file = self.path + "twines/valid_schema_twine.json"
# twine = Twine(file=twine_file)
# values_file = self.path + "configurations/empty.json"
# with self.assertRaises(exceptions.InvalidValuesJson):
# twine.validate_configuration(file=values_file)
#
# def test_incorrect_values(self):
# """ Ensures that appropriate errors are generated for invalid values
# """
# twine_file = self.path + "twines/valid_schema_twine.json"
# twine = Twine(file=twine_file)
# values_file = self.path + "configurations/incorrect.json"
# with self.assertRaises(exceptions.InvalidValuesContents):
# twine.validate_configuration(file=values_file)
#
# def test_missing_not_required_values(self):
# """ Ensures that appropriate errors are generated for missing values
# """
# twine_file = self.path + "twines/valid_schema_twine.json"
# twine = Twine(file=twine_file)
# values_file = self.path + "outputs/missing_not_required.json"
# twine.validate_output_values(file=values_file)
#
# def test_missing_required_values(self):
# """ Ensures that appropriate errors are generated for missing values
# """
# twine_file = self.path + "twines/valid_schema_twine.json"
# twine = Twine(file=twine_file)
# values_file = self.path + "inputs/missing_required.json"
# with self.assertRaises(exceptions.InvalidValuesContents):
# twine.validate_input_values(file=values_file)
#
# def test_valid_values_files(self):
# """ Ensures that values can be read and validated correctly from files on disk
# """
# twine_file = self.path + "twines/valid_schema_twine.json"
# twine = Twine(file=twine_file)
# twine.validate_configuration(file=self.path + "configurations/valid.json")
# twine.validate_input_values(file=self.path + "inputs/valid.json")
# twine.validate_output_values(file=self.path + "outputs/valid.json")
#
# def test_valid_values_json(self):
# """ Ensures that values can be read and validated correctly from a json string
# """
# twine_file = self.path + "twines/valid_schema_twine.json"
# twine = Twine(file=twine_file)
# values_file = self.path + "configurations/valid.json"
# with open(values_file, "r", encoding="utf-8") as f:
# json_string = f.read()
# twine.validate_configuration(json=json_string)
#
# def test_valid_with_extra_values(self):
# """ Ensures that extra values get ignored
# """
# twine_file = self.path + "twines/valid_schema_twine.json"
# twine = Twine(file=twine_file)
# values_file = self.path + "configurations/valid_with_extra.json"
# twine.validate_configuration(file=values_file)


if __name__ == "__main__":
unittest.main()
16 changes: 15 additions & 1 deletion twined/twine.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ def _validate_against_schema(self, strand, data):
logger.debug("Validated %s against schema", strand)

except ValidationError as e:
raise exceptions.invalid_contents_map[strand](e.message)
raise exceptions.invalid_contents_map[strand](str(e))

def _validate_twine_version(self):
""" Validates that the installed version is consistent with an optional version specification in the twine file
Expand Down Expand Up @@ -244,6 +244,20 @@ def validate_output_values(self, **kwargs):
self._validate_against_schema("output_values", data)
return data

def validate_input_manifest(self, **kwargs):
""" Validates the input manifest, passed as either a file or a json string
"""
data = self._load_json("input_manifest", **kwargs)
self._validate_against_schema("input_manifest", data)
return data

def validate_output_manifest(self, **kwargs):
""" Validates the output manifest, passed as either a file or a json string
"""
data = self._load_json("output_manifest", **kwargs)
self._validate_against_schema("output_manifest", data)
return data

# def validate(
# self,
# configuration=None,
Expand Down

0 comments on commit ac16299

Please sign in to comment.