From 0da1e12145ac334fc4d18aa73c6ed76eb8d2276d Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Fri, 19 Jun 2020 14:31:53 +0100 Subject: [PATCH 01/14] OPS Adding sphinx build to the pre-commit quality control --- .pre-commit-config.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b8da05..7762cda 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,6 +29,11 @@ repos: rev: 3.8.1 hooks: - id: flake8 -# args: ['--config=setup.cfg'] additional_dependencies: [flake8-isort] language_version: python3 + + - repo: https://github.com/thclark/pre-commit-sphinx + rev: 0.0.1 + hooks: + - id: build-docs + language_version: python3 From 6d8d5e6c70891213d3e7ba4dd4575ce4eb7cb4a3 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Mon, 22 Jun 2020 18:42:15 +0100 Subject: [PATCH 02/14] DEPS consolidated developer requirements --- docs/requirements.txt | 7 ------- requirements-dev.txt | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) delete mode 100644 docs/requirements.txt diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 2e349a1..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,7 +0,0 @@ - -# Required by the python script for building documentation -Sphinx==1.8.3 -sphinx-rtd-theme==0.4.2 -sphinx-tabs==1.1.10 -breathe==4.11.1 -exhale==0.2.1 diff --git a/requirements-dev.txt b/requirements-dev.txt index 109c3be..2f0f26c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -10,3 +10,20 @@ flake8==3.8.3 # https://github.com/PyCQA/flake8 flake8-isort==3.0.0 # https://github.com/gforcada/flake8-isort black==19.10.b0 # https://github.com/ambv/black pre-commit # https://github.com/pre-commit/pre-commit + + +# Pre-deploy checks +# ------------------------------------------------------------------------------ +setuptools +wheel +twine # <---- nothing to do with the twined library! + + +# Building documentation +# ------------------------------------------------------------------------------ +Sphinx==1.8.3 +sphinx-rtd-theme==0.4.2 +sphinx-tabs==1.1.10 +breathe==4.11.1 +exhale==0.2.1 +pre-commit try-repo ../hook-repo foo --verbose --all-files From 75d803194b7a80b8ae185e2276ddc5212868c943 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Mon, 22 Jun 2020 18:45:55 +0100 Subject: [PATCH 03/14] DEPS dotenv allows us to parse credentials --- setup.cfg | 2 +- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index b5069ea..e488647 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,3 +1,3 @@ [metadata] long_description = file: README.md -long_description_content_type = text/markdown; charset=UTF-8 \ No newline at end of file +long_description_content_type = text/markdown; charset=UTF-8 diff --git a/setup.py b/setup.py index 670be80..e59ad51 100644 --- a/setup.py +++ b/setup.py @@ -17,9 +17,9 @@ name="twined", version="0.0.9", py_modules=[], - install_requires=["jsonschema ~= 3.2.0"], + install_requires=["jsonschema ~= 3.2.0", "python-dotenv"], url="https://www.github.com/octue/twined", - license='MIT', + license=license_text, author="Octue (github: octue)", description="A library to help digital twins and data services talk to one another", long_description=readme_text, From 175aa3867d7088fd812efdf489fa5d21358e53e0 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 08:07:35 +0100 Subject: [PATCH 04/14] GIT Ignoring locally generated documents --- docs/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 docs/.gitignore diff --git a/docs/.gitignore b/docs/.gitignore new file mode 100644 index 0000000..10971d6 --- /dev/null +++ b/docs/.gitignore @@ -0,0 +1,2 @@ +doctrees +html From cb2de7bf888758cf130b1a5e2e05b47013df8428 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 08:09:26 +0100 Subject: [PATCH 05/14] FIX Added missing and unmapped exceptions --- twined/exceptions.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/twined/exceptions.py b/twined/exceptions.py index 184bde7..55aee95 100644 --- a/twined/exceptions.py +++ b/twined/exceptions.py @@ -47,6 +47,11 @@ class FolderNotFound(TwineException): """ +class CredentialNotFound(TwineException): + """ Raised when a credential specified in the twine file is not present in either the environment or a .env file + """ + + class TwineFileNotFound(TwineException, FileNotFoundError): """ Raised when the specified twine file is not present """ @@ -140,6 +145,7 @@ class UnexpectedNumberOfResults(TwineException): # TODO Specialised per-strand exceptions to help drill to the root of the issues invalid_json_map = { "twine": InvalidTwineJson, + "children": InvalidValuesJson, "configuration": InvalidValuesJson, "input_values": InvalidValuesJson, "output_values": InvalidValuesJson, @@ -150,6 +156,7 @@ class UnexpectedNumberOfResults(TwineException): # TODO Specialised per-strand exceptions to help drill to the root of the issues invalid_contents_map = { "twine": InvalidTwineContents, + "children": InvalidValuesContents, "configuration": InvalidValuesContents, "input_values": InvalidValuesContents, "output_values": InvalidValuesContents, From 7f7e1eab6523a25a7b7058f76b11155740076fbf Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 08:10:15 +0100 Subject: [PATCH 06/14] TST Added fixture data for testing credentials and children validation --- tests/data/children/extra_key.json | 13 +++++++++++++ tests/data/children/extra_property.json | 8 ++++++++ tests/data/children/invalid_env_name.json | 7 +++++++ tests/data/children/valid.json | 7 +++++++ tests/data/twines/valid_credentials_twine.json | 17 +++++++++++++++++ .../data/twines/valid_empty_children_twine.json | 4 ++++ 6 files changed, 56 insertions(+) create mode 100644 tests/data/children/extra_key.json create mode 100644 tests/data/children/extra_property.json create mode 100644 tests/data/children/invalid_env_name.json create mode 100644 tests/data/children/valid.json create mode 100644 tests/data/twines/valid_credentials_twine.json create mode 100644 tests/data/twines/valid_empty_children_twine.json diff --git a/tests/data/children/extra_key.json b/tests/data/children/extra_key.json new file mode 100644 index 0000000..80163d7 --- /dev/null +++ b/tests/data/children/extra_key.json @@ -0,0 +1,13 @@ +[ + { + "key": "gis", + "id": "some-id", + "uri_env_name": "SOME_ENV_VAR_NAME", + "some_extra_key": "should not be a problem if present" + }, + { + "key": "some_weird_other_child", + "id": "some-other-id", + "uri_env_name": "SOME_ENV_VAR_NAME" + } +] \ No newline at end of file diff --git a/tests/data/children/extra_property.json b/tests/data/children/extra_property.json new file mode 100644 index 0000000..81e64a6 --- /dev/null +++ b/tests/data/children/extra_property.json @@ -0,0 +1,8 @@ +[ + { + "key": "gis", + "id": "some-id", + "uri_env_name": "SOME_ENV_VAR_NAME", + "some_extra_property": "should not be a problem if present" + } +] \ No newline at end of file diff --git a/tests/data/children/invalid_env_name.json b/tests/data/children/invalid_env_name.json new file mode 100644 index 0000000..6c880e1 --- /dev/null +++ b/tests/data/children/invalid_env_name.json @@ -0,0 +1,7 @@ +[ + { + "key": "gis", + "id": "some-id", + "uri_env_name": "an environment variable which isnt in CAPS_CASE is invalid per the credentials spec" + } +] \ No newline at end of file diff --git a/tests/data/children/valid.json b/tests/data/children/valid.json new file mode 100644 index 0000000..f2e787e --- /dev/null +++ b/tests/data/children/valid.json @@ -0,0 +1,7 @@ +[ + { + "key": "gis", + "id": "some-id", + "uri_env_name": "NAME_OF_SOME_ENV_VAR_THAT_CONTAINS_A_URI" + } +] \ No newline at end of file diff --git a/tests/data/twines/valid_credentials_twine.json b/tests/data/twines/valid_credentials_twine.json new file mode 100644 index 0000000..3d49808 --- /dev/null +++ b/tests/data/twines/valid_credentials_twine.json @@ -0,0 +1,17 @@ +{ + "credentials": [ + { + "name": "SECRET_THE_FIRST", + "purpose": "Token for accessing a 3rd party API service" + }, + { + "name": "SECRET_THE_SECOND", + "purpose": "Token for accessing a 3rd party API service" + }, + { + "name": "SECRET_THE_THIRD", + "purpose": "Usually a big secret but sometimes has a convenient non-secret default, like a sandbox or local database", + "default": "postgres://pguser:pgpassword@localhost:5432/pgdb" + } + ] +} diff --git a/tests/data/twines/valid_empty_children_twine.json b/tests/data/twines/valid_empty_children_twine.json new file mode 100644 index 0000000..266797e --- /dev/null +++ b/tests/data/twines/valid_empty_children_twine.json @@ -0,0 +1,4 @@ +{ + "children": [ + ] +} From fafad0e96d97695729803a0d3bf401d45dc17440 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 08:10:46 +0100 Subject: [PATCH 07/14] IMP Added validation for children and credentials --- tests/test_children.py | 89 +++++++++++++++++++++++--------- tests/test_credentials.py | 61 +++++++++++++++------- twined/twine.py | 105 ++++++++++++++++++++++++++++++++++++-- 3 files changed, 209 insertions(+), 46 deletions(-) diff --git a/tests/test_children.py b/tests/test_children.py index eea79da..e27552d 100644 --- a/tests/test_children.py +++ b/tests/test_children.py @@ -33,30 +33,73 @@ def test_valid_children(self): twine = Twine(file=twine_file) self.assertEqual(len(twine._raw["children"]), 1) + def test_empty_children(self): + """ Ensures that a twine file will validate with an empty list object as children + """ + twine_file = self.path + "twines/valid_empty_children_twine.json" + twine = Twine(file=twine_file) + self.assertEqual(len(twine._raw["children"]), 0) + + +class TestChildrenValidation(BaseTestCase): + """ Tests related to whether validation of children occurs successfully (given a valid twine) + """ + + def test_no_children(self): + """ Test that a twine with no children will validate on an empty children input + """ + twine = Twine() # Creates empty twine + twine.validate_children(json="[]") + + def test_missing_children(self): + """ Test that a twine with children will not validate on an empty children input + """ + twine = Twine(file=self.path + "twines/valid_children_twine.json") + with self.assertRaises(exceptions.InvalidValuesContents): + twine.validate_children(json="[]") + + def test_extra_children(self): + """ Test that a twine with no children will not validate a non-empty children input + """ + twine = Twine() # Creates empty twine + with self.assertRaises(exceptions.InvalidValuesContents): + twine.validate_children(file=self.path + "children/valid.json") -# class TestChildrenValidation(unittest.TestCase): -# """ Tests related to whether validation of children occurs successfully (given a valid twine) -# """ -# -# def test_no_children(self): -# """ Test that a twine with no children will validate on an empty children input -# """ -# raise exceptions.NotImplementedYet() -# -# def test_missing_children(self): -# """ Test that a twine with children will not validate on an empty children input -# """ -# raise exceptions.NotImplementedYet() -# -# def test_extra_children(self): -# """ Test that a twine with no children will not validate a non-empty children input -# """ -# raise exceptions.NotImplementedYet() -# -# def test_matched_children(self): -# """ Test that a twine with children required will validate when the children input matches -# """ -# raise exceptions.NotImplementedYet() + def test_extra_key(self): + """ Test that children with extra data will not raise validation error + """ + twine = Twine() # Creates empty twine + with self.assertRaises(exceptions.InvalidValuesContents): + twine.validate_children(file=self.path + "children/extra_key.json") + + def test_extra_property(self): + """ Test that children with extra data will not raise validation error + # TODO review this behaviour - possibly should raise an error but allow for a user specified extra_data property + """ + twine = Twine(file=self.path + "twines/valid_children_twine.json") + twine.validate_children(file=self.path + "children/extra_property.json") + + def test_invalid_env_name(self): + """ Test that a child uri env name not in ALL_CAPS_SNAKE_CASE doesn't validate + """ + twine = Twine() # Creates empty twine + with self.assertRaises(exceptions.InvalidValuesContents): + twine.validate_children(file=self.path + "children/invalid_env_name.json") + + def test_invalid_json(self): + """ Tests that a children entry with invalid json will raise an error + """ + twine = Twine(file=self.path + "twines/valid_children_twine.json") + with self.assertRaises(exceptions.InvalidValuesJson): + twine.validate_children(json="[") + + def test_valid(self): + """ Test that a valid twine will validate valid children + Valiantly and Validly validating validity since 1983. + To those reading this, know that YOU'RE valid. + """ + twine = Twine(file=self.path + "twines/valid_children_twine.json") + twine.validate_children(file=self.path + "children/valid.json") if __name__ == "__main__": diff --git a/tests/test_credentials.py b/tests/test_credentials.py index dbb1de5..d72e37f 100644 --- a/tests/test_credentials.py +++ b/tests/test_credentials.py @@ -1,4 +1,6 @@ +import os import unittest +from unittest import mock from twined import Twine, exceptions @@ -40,25 +42,46 @@ def test_fails_on_name_whitespace(self): Twine(file=twine_file) -# class TestCredentialsValidation(unittest.TestCase): -# """ Tests related to whether validation of children occurs successfully (given a valid twine) -# """ -# -# def test_no_credentials(self): -# """ Test that a twine with no credentials will validate straightforwardly -# """ -# raise exceptions.NotImplementedYet() -# -# def test_missing_credentials(self): -# """ Test that a twine with credentials will not validate where they are missing from the environment -# """ -# raise exceptions.NotImplementedYet() -# -# def test_matched_credentials(self): -# """ Test that a twine with credentials required will validate when the credentials are available in the -# environment -# """ -# raise exceptions.NotImplementedYet() +class TestCredentialsValidation(BaseTestCase): + """ Tests related to whether validation of children occurs successfully (given a valid twine) + """ + + def test_no_credentials(self): + """ Test that a twine with no credentials will validate straightforwardly + """ + twine = Twine(file=self.path + "twines/valid_schema_twine.json") + twine.validate_credentials() + + def test_missing_credentials(self): + """ Test that a twine with credentials will not validate where they are missing from the environment + """ + twine = Twine(file=self.path + "twines/valid_credentials_twine.json") + with self.assertRaises(exceptions.CredentialNotFound): + twine.validate_credentials() + + def test_default_credentials(self): + """ Test that a twine with credentials will validate where ones with defaults are missing from the environment + """ + twine = Twine(file=self.path + "twines/valid_credentials_twine.json") + with mock.patch.dict(os.environ, {"SECRET_THE_FIRST": "a value", "SECRET_THE_SECOND": "another value"}): + credentials = twine.validate_credentials() + + self.assertIn("SECRET_THE_FIRST", credentials.keys()) + self.assertIn("SECRET_THE_SECOND", credentials.keys()) + self.assertIn("SECRET_THE_THIRD", credentials.keys()) + self.assertEqual(credentials["SECRET_THE_THIRD"], "postgres://pguser:pgpassword@localhost:5432/pgdb") + + def test_nondefault_credentials(self): + """ Test that the environment will override a default value for a credential + """ + twine = Twine(file=self.path + "twines/valid_credentials_twine.json") + with mock.patch.dict( + os.environ, + {"SECRET_THE_FIRST": "a value", "SECRET_THE_SECOND": "another value", "SECRET_THE_THIRD": "nondefault"}, + ): + credentials = twine.validate_credentials() + + self.assertEqual(credentials["SECRET_THE_THIRD"], "nondefault") if __name__ == "__main__": diff --git a/twined/twine.py b/twined/twine.py index 7e387fd..1de8aae 100644 --- a/twined/twine.py +++ b/twined/twine.py @@ -1,10 +1,13 @@ import json as jsonlib import logging +import os import pkg_resources from jsonschema import ValidationError from jsonschema import validate as jsonschema_validate +from dotenv import load_dotenv + from . import exceptions logger = logging.getLogger(__name__) @@ -93,16 +96,30 @@ def _validate_against_schema(self, strand, data): """ Validates data against a schema, raises exceptions of type InvalidJson if not compliant. Can be used to validate: - - values data for compliance with schema (for schema based strands) or - the twine file contents itself against the present version twine spec + - children data against the required schema for the present version twine spec + - values data for compliance with schema written in the twine (for strands like input_values_schema) """ if strand == "twine": - # A twine *contains* schema, but we also need to verify that it matches a certain schema itself - # The twine schema is distributed with this packaged to ensure version consistency... + # The data is a twine. A twine *contains* schema, but we also need to verify that it matches a certain + # schema itself. The twine schema is distributed with this packaged to ensure version consistency... schema = jsonlib.loads(pkg_resources.resource_string("twined", "schema/twine_schema.json")) + + elif strand in CHILDREN_STRANDS: + # The data is a list of children. The "children" strand of the twine describes matching criteria for + # the children, not the schema of the "children" data, which is distributed with this package to ensure + # version consistency... + schema = jsonlib.loads(pkg_resources.resource_string("twined", "schema/children_schema.json")) + + elif strand in MANIFEST_STRANDS: + # The data is a manifest of files. The "*_manifest" strands of the twine describe matching criteria used to + # filter files appropriate for consumption by the digital twin, not the schema of the manifest data, which + # is distributed with thie package to ensure version consistency... + schema = jsonlib.loads(pkg_resources.resource_string("twined", "schema/manifest_schema.json")) + else: if strand not in SCHEMA_STRANDS: - raise exceptions.TwineTypeException(f"Unknown strand {strand}. Try one of {SCHEMA_STRANDS}.") + raise exceptions.TwineTypeException(f"Unknown strand {strand}. Try one of {ALL_STRANDS}.") schema_key = strand + "_schema" schema = self._raw[schema_key] @@ -126,6 +143,86 @@ def _validate_twine_version(self): f"Twined library version conflict. Twine file requires {twine_file_twined_version} but you have {installed_twined_version} installed" ) + def validate_children(self, **kwargs): + """ Validates that the children values, passed as either a file or a json string, are correct + """ + # TODO cache this loaded data keyed on a hashed version of kwargs + children = self._load_json("children", **kwargs) + self._validate_against_schema("children", children) + + strand = self._raw.get("children", []) + + # Loop the children and accumulate values so we have an O(1) check + children_keys = {} + for child in children: + children_keys[child["key"]] = children_keys.get(child["key"], 0) + 1 + + # Check there is at least one child for each item described in the strand + # TODO add max, min num specs to the strand schema and check here + for item in strand: + strand_key = item["key"] + if children_keys.get(strand_key, 0) <= 0: + raise exceptions.InvalidValuesContents(f"No children found matching the key {strand_key}") + + # Loop the strand and add unique keys to dict so we have an O(1) check + strand_keys = {} + for item in strand: + strand_keys[item["key"]] = True + + # Check that each child has a key which is described in the strand + for child in children: + child_key = child["key"] + if not strand_keys.get(child_key, False): + raise exceptions.InvalidValuesContents( + f"Child with key '{child_key}' found but no such key exists in the 'children' strand of the twine." + ) + + # TODO Additional validation that the children match what is set as required in the Twine + return children + + def validate_credentials(self, dotenv_path=None): + """ Validates that all credentials required by the twine are present + + Credentials may either be set as environment variables or defined in a '.env' file. If not present in the + environment, validate_credentials will check for variables in a .env file (if present) and populate the + environment with them. If not present in either the environment or the .env file, default values are used + (if defined) or an error is thrown. + + Typically a .env file resides at the root of your application (the working directory) although a specific path + may be set using the `dotenv_path` argument. + + .env files should never be committed to git or any other version control system. + + A .env file can look like this: + ``` + # a comment that will be ignored. + YOUR_SECRET_VALUE=itsasecret + MEANING_OF_LIFE=42 + MULTILINE_VAR="hello\nworld" + ``` + Or like this (also useful for bash users): + ``` + export YOUR_SECRET_VALUE=itsasecret + export MEANING_OF_LIFE=42 + export MULTILINE_VAR="hello\nworld" + ``` + """ + + # Load any variables from the .env file into the environment + dotenv_path = dotenv_path or os.path.join(".", ".env") + load_dotenv(dotenv_path) + + # Loop through the required credentials to check for presence of each + credentials = {} + for credential in self._raw.get("credentials", []): + name = credential["name"] + default = credential.get("default", None) + credentials[name] = os.environ.get(name, default) + if credentials[name] is None: + raise exceptions.CredentialNotFound(f"Credential '{name}' missing from environment or .env file") + + return credentials + def validate_configuration(self, **kwargs): """ Validates that the configuration values, passed as either a file or a json string, are correct """ From 440571efea58cc9d0d15e50fd2c34b340002df1d Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 08:11:41 +0100 Subject: [PATCH 08/14] OPS Version bump --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e59ad51..264e17d 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ setup( name="twined", - version="0.0.9", + version="0.0.10", py_modules=[], install_requires=["jsonschema ~= 3.2.0", "python-dotenv"], url="https://www.github.com/octue/twined", From ee25801c23d8bfb558f4916538d3da5bdc964df2 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 08:41:13 +0100 Subject: [PATCH 09/14] DOC Updated credentials and input values quickstart examples --- docs/source/quick_start.rst | 26 ++++++++++++++++++++++++-- docs/source/schema.rst | 11 +++++++++-- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/docs/source/quick_start.rst b/docs/source/quick_start.rst index 546ddaa..9d89d1c 100644 --- a/docs/source/quick_start.rst +++ b/docs/source/quick_start.rst @@ -77,7 +77,7 @@ Load the twine ============== **twined** provides a `Twine()` class to load a twine (from a file or a json string). -The loading process checks the twine is valid. It's as simple as: +The loading process checks the twine itself is valid. It's as simple as: .. code-block:: py @@ -91,5 +91,27 @@ The loading process checks the twine is valid. It's as simple as: Validate some inputs ==================== +Say we have some json that we want to parse and validate, to make sure it matches what's required for input values. + +.. code-block:: py + + my_input_values = my_twine.validate_input_values(json='{"foo": 30, "baz": 500}') + +You can read the values from a file too. Paste the following into a file named ``input_values.json``: + +.. code-block:: javascript + + { + "foo": 30, + "baz": 500 + } + +Then parse and validate directly from the file: + +.. code-block:: py + + my_input_values = my_twine.validate_input_values(file="input_values.json") + + .. ATTENTION:: - LIBRARY IS UNDER CONSTRUCTION! WATCH THIS SPACE! \ No newline at end of file + LIBRARY IS UNDER CONSTRUCTION! WATCH THIS SPACE FOR MORE! \ No newline at end of file diff --git a/docs/source/schema.rst b/docs/source/schema.rst index f821595..98af867 100644 --- a/docs/source/schema.rst +++ b/docs/source/schema.rst @@ -278,8 +278,15 @@ Here, we describe how each of these data classes is described by **twined**. SERVICE_API_KEY=someLongTokenTHatYouProbablyHaveToPayTheThirdPartyProviderLoadsOfMoneyFor - **twined** helps by providing a small shim to check for their presence and bring these environment variables - into your configuration. + Credentials may also reside in a ``.env`` file in the current directory, either in the format above + (with a new line for each variable) or, for convenience, as bash exports like: + + .. code-block:: javascript + + export SERVICE_API_KEY=someLongTokenTHatYouProbablyHaveToPayTheThirdPartyProviderLoadsOfMoneyFor + + The ``validate_credentials()`` method of the ``Twine class checks for their presence and, where contained in a + ``.env`` file, ensures they are loaded into the environment. .. ATTENTION:: From 81876ce6be005fd8d8a534781b8e595f054bf290 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 08:42:57 +0100 Subject: [PATCH 10/14] STY End of files --- tests/data/children/extra_key.json | 2 +- tests/data/children/extra_property.json | 2 +- tests/data/children/invalid_env_name.json | 2 +- tests/data/children/valid.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/data/children/extra_key.json b/tests/data/children/extra_key.json index 80163d7..1a14448 100644 --- a/tests/data/children/extra_key.json +++ b/tests/data/children/extra_key.json @@ -10,4 +10,4 @@ "id": "some-other-id", "uri_env_name": "SOME_ENV_VAR_NAME" } -] \ No newline at end of file +] diff --git a/tests/data/children/extra_property.json b/tests/data/children/extra_property.json index 81e64a6..83afc66 100644 --- a/tests/data/children/extra_property.json +++ b/tests/data/children/extra_property.json @@ -5,4 +5,4 @@ "uri_env_name": "SOME_ENV_VAR_NAME", "some_extra_property": "should not be a problem if present" } -] \ No newline at end of file +] diff --git a/tests/data/children/invalid_env_name.json b/tests/data/children/invalid_env_name.json index 6c880e1..69391f9 100644 --- a/tests/data/children/invalid_env_name.json +++ b/tests/data/children/invalid_env_name.json @@ -4,4 +4,4 @@ "id": "some-id", "uri_env_name": "an environment variable which isnt in CAPS_CASE is invalid per the credentials spec" } -] \ No newline at end of file +] diff --git a/tests/data/children/valid.json b/tests/data/children/valid.json index f2e787e..37fed88 100644 --- a/tests/data/children/valid.json +++ b/tests/data/children/valid.json @@ -4,4 +4,4 @@ "id": "some-id", "uri_env_name": "NAME_OF_SOME_ENV_VAR_THAT_CONTAINS_A_URI" } -] \ No newline at end of file +] From 76ccb3f43719368deee288aeef6eb308015c5c2f Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 10:07:00 +0100 Subject: [PATCH 11/14] REF Sorted the fixture files more sensibly --- .../configurations/configuration_empty.json} | 0 .../configuration_incorrect.json} | 0 .../configurations/configuration_valid.json} | 0 .../configuration_valid_with_extra.json} | 0 .../inputs/input_missing_required.json} | 0 .../inputs/input_valid.json} | 0 .../outputs/output_missing_not_required.json} | 0 .../outputs/output_valid.json} | 0 tests/test_schema_strands.py | 20 +++++++++---------- 9 files changed, 10 insertions(+), 10 deletions(-) rename tests/data/{configurations/empty.json => values/configurations/configuration_empty.json} (100%) rename tests/data/{configurations/incorrect.json => values/configurations/configuration_incorrect.json} (100%) rename tests/data/{configurations/valid.json => values/configurations/configuration_valid.json} (100%) rename tests/data/{configurations/valid_with_extra.json => values/configurations/configuration_valid_with_extra.json} (100%) rename tests/data/{inputs/missing_required.json => values/inputs/input_missing_required.json} (100%) rename tests/data/{inputs/valid.json => values/inputs/input_valid.json} (100%) rename tests/data/{outputs/missing_not_required.json => values/outputs/output_missing_not_required.json} (100%) rename tests/data/{outputs/valid.json => values/outputs/output_valid.json} (100%) diff --git a/tests/data/configurations/empty.json b/tests/data/values/configurations/configuration_empty.json similarity index 100% rename from tests/data/configurations/empty.json rename to tests/data/values/configurations/configuration_empty.json diff --git a/tests/data/configurations/incorrect.json b/tests/data/values/configurations/configuration_incorrect.json similarity index 100% rename from tests/data/configurations/incorrect.json rename to tests/data/values/configurations/configuration_incorrect.json diff --git a/tests/data/configurations/valid.json b/tests/data/values/configurations/configuration_valid.json similarity index 100% rename from tests/data/configurations/valid.json rename to tests/data/values/configurations/configuration_valid.json diff --git a/tests/data/configurations/valid_with_extra.json b/tests/data/values/configurations/configuration_valid_with_extra.json similarity index 100% rename from tests/data/configurations/valid_with_extra.json rename to tests/data/values/configurations/configuration_valid_with_extra.json diff --git a/tests/data/inputs/missing_required.json b/tests/data/values/inputs/input_missing_required.json similarity index 100% rename from tests/data/inputs/missing_required.json rename to tests/data/values/inputs/input_missing_required.json diff --git a/tests/data/inputs/valid.json b/tests/data/values/inputs/input_valid.json similarity index 100% rename from tests/data/inputs/valid.json rename to tests/data/values/inputs/input_valid.json diff --git a/tests/data/outputs/missing_not_required.json b/tests/data/values/outputs/output_missing_not_required.json similarity index 100% rename from tests/data/outputs/missing_not_required.json rename to tests/data/values/outputs/output_missing_not_required.json diff --git a/tests/data/outputs/valid.json b/tests/data/values/outputs/output_valid.json similarity index 100% rename from tests/data/outputs/valid.json rename to tests/data/values/outputs/output_valid.json diff --git a/tests/test_schema_strands.py b/tests/test_schema_strands.py index f40281f..86b9ac6 100644 --- a/tests/test_schema_strands.py +++ b/tests/test_schema_strands.py @@ -25,7 +25,7 @@ def test_invalid_strand(self): """ twine_file = self.path + "twines/valid_schema_twine.json" twine = Twine(file=twine_file) - values_file = self.path + "configurations/valid.json" + values_file = self.path + "values/configurations/configuration_valid.json" data = twine._load_json("configuration", file=values_file) with self.assertRaises(exceptions.TwineTypeException): twine._validate_against_schema("not_a_strand_name", data) @@ -50,7 +50,7 @@ def test_empty_values(self): """ twine_file = self.path + "twines/valid_schema_twine.json" twine = Twine(file=twine_file) - values_file = self.path + "configurations/empty.json" + values_file = self.path + "values/configurations/configuration_empty.json" with self.assertRaises(exceptions.InvalidValuesJson): twine.validate_configuration(file=values_file) @@ -59,7 +59,7 @@ def test_incorrect_values(self): """ twine_file = self.path + "twines/valid_schema_twine.json" twine = Twine(file=twine_file) - values_file = self.path + "configurations/incorrect.json" + values_file = self.path + "values/configurations/configuration_incorrect.json" with self.assertRaises(exceptions.InvalidValuesContents): twine.validate_configuration(file=values_file) @@ -68,7 +68,7 @@ def test_missing_not_required_values(self): """ twine_file = self.path + "twines/valid_schema_twine.json" twine = Twine(file=twine_file) - values_file = self.path + "outputs/missing_not_required.json" + values_file = self.path + "values/outputs/output_missing_not_required.json" twine.validate_output_values(file=values_file) def test_missing_required_values(self): @@ -76,7 +76,7 @@ def test_missing_required_values(self): """ twine_file = self.path + "twines/valid_schema_twine.json" twine = Twine(file=twine_file) - values_file = self.path + "inputs/missing_required.json" + values_file = self.path + "values/inputs/input_missing_required.json" with self.assertRaises(exceptions.InvalidValuesContents): twine.validate_input_values(file=values_file) @@ -85,16 +85,16 @@ def test_valid_values_files(self): """ twine_file = self.path + "twines/valid_schema_twine.json" twine = Twine(file=twine_file) - twine.validate_configuration(file=self.path + "configurations/valid.json") - twine.validate_input_values(file=self.path + "inputs/valid.json") - twine.validate_output_values(file=self.path + "outputs/valid.json") + twine.validate_configuration(file=self.path + "values/configurations/configuration_valid.json") + twine.validate_input_values(file=self.path + "values/inputs/input_valid.json") + twine.validate_output_values(file=self.path + "values/outputs/output_valid.json") def test_valid_values_json(self): """ Ensures that values can be read and validated correctly from a json string """ twine_file = self.path + "twines/valid_schema_twine.json" twine = Twine(file=twine_file) - values_file = self.path + "configurations/valid.json" + values_file = self.path + "values/configurations/configuration_valid.json" with open(values_file, "r", encoding="utf-8") as f: json_string = f.read() twine.validate_configuration(json=json_string) @@ -104,7 +104,7 @@ def test_valid_with_extra_values(self): """ twine_file = self.path + "twines/valid_schema_twine.json" twine = Twine(file=twine_file) - values_file = self.path + "configurations/valid_with_extra.json" + values_file = self.path + "values/configurations/configuration_valid_with_extra.json" twine.validate_configuration(file=values_file) From 43b11848f60a56bd94aeacf277c4d84b29e38c37 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 10:07:14 +0100 Subject: [PATCH 12/14] FIX Added missing manifest not found exceptions to the map --- twined/exceptions.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/twined/exceptions.py b/twined/exceptions.py index 55aee95..743d3df 100644 --- a/twined/exceptions.py +++ b/twined/exceptions.py @@ -140,6 +140,8 @@ class UnexpectedNumberOfResults(TwineException): "configuration": ConfigFileNotFound, "input_values": InputValuesFileNotFound, "output_values": OutputValuesFileNotFound, + "input_manifest": InputManifestFileNotFound, + "output_manifest": OutputManifestFileNotFound, } # TODO Specialised per-strand exceptions to help drill to the root of the issues From 74f867e83ba58be82e1d516d581bd3db2f286df0 Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 10:11:45 +0100 Subject: [PATCH 13/14] IMP Altered manifest schema to default the kind --- twined/schema/manifest_schema.json | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/twined/schema/manifest_schema.json b/twined/schema/manifest_schema.json index 53eb686..e4a98eb 100644 --- a/twined/schema/manifest_schema.json +++ b/twined/schema/manifest_schema.json @@ -2,8 +2,18 @@ "type": "object", "properties": { "kind": { - "description": "The kind of the manifest, eg 'multidataset'", - "type": "string" + "description": "The kind of the manifest, (only 'multi-dataset' available, reserved for future use)", + "type": "string", + "default": "multi-dataset", + "anyOf": [ + { + "type": "string", + "enum": [ + "multi-dataset" + ], + "title": "Multi Dataset" + } + ] }, "id": { "description": "ID of the manifest, typically a uuid", @@ -76,5 +86,5 @@ } } }, - "required": ["id", "kind", "datasets"] + "required": ["id", "datasets"] } From ac16299d82a7c037f75656bec41a089bd68785ba Mon Sep 17 00:00:00 2001 From: Tom Clark Date: Tue, 23 Jun 2020 10:12:10 +0100 Subject: [PATCH 14/14] IMP Added basic validation and testing for manifests --- tests/data/manifests/inputs/input_valid.json | 38 ++++++ .../data/manifests/outputs/output_valid.json | 38 ++++++ tests/data/twines/valid_manifest_twine.json | 14 +++ tests/test_manifest_strands.py | 110 ++++++++++++++++++ twined/twine.py | 16 ++- 5 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 tests/data/manifests/inputs/input_valid.json create mode 100644 tests/data/manifests/outputs/output_valid.json create mode 100644 tests/data/twines/valid_manifest_twine.json create mode 100644 tests/test_manifest_strands.py diff --git a/tests/data/manifests/inputs/input_valid.json b/tests/data/manifests/inputs/input_valid.json new file mode 100644 index 0000000..3c75b4e --- /dev/null +++ b/tests/data/manifests/inputs/input_valid.json @@ -0,0 +1,38 @@ +{ + "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", + "datasets": [ + { + "id": "7ead7669-8162-4f64-8cd5-4abe92509e17", + "name": "my meteorological dataset", + "tags": "met, mast, wind", + "files": [ + { + "path": "input/datasets/7ead7669/file_1.csv", + "cluster": 0, + "sequence": 0, + "extension": "csv", + "tags": "", + "posix_timestamp": 0, + "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86", + "last_modified": "2019-02-28T22:40:30.533005Z", + "name": "file_1.csv", + "size_bytes": 59684813, + "sha-512/256": "somesha" + }, + { + "path": "input/datasets/7ead7669/file_2.csv", + "cluster": 0, + "sequence": 1, + "extension": "csv", + "tags": "", + "posix_timestamp": 0, + "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45", + "last_modified": "2019-02-28T22:40:40.633001Z", + "name": "file_2.csv", + "size_bytes": 59684813, + "sha-512/256": "someothersha" + } + ] + } + ] +} diff --git a/tests/data/manifests/outputs/output_valid.json b/tests/data/manifests/outputs/output_valid.json new file mode 100644 index 0000000..3c75b4e --- /dev/null +++ b/tests/data/manifests/outputs/output_valid.json @@ -0,0 +1,38 @@ +{ + "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", + "datasets": [ + { + "id": "7ead7669-8162-4f64-8cd5-4abe92509e17", + "name": "my meteorological dataset", + "tags": "met, mast, wind", + "files": [ + { + "path": "input/datasets/7ead7669/file_1.csv", + "cluster": 0, + "sequence": 0, + "extension": "csv", + "tags": "", + "posix_timestamp": 0, + "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86", + "last_modified": "2019-02-28T22:40:30.533005Z", + "name": "file_1.csv", + "size_bytes": 59684813, + "sha-512/256": "somesha" + }, + { + "path": "input/datasets/7ead7669/file_2.csv", + "cluster": 0, + "sequence": 1, + "extension": "csv", + "tags": "", + "posix_timestamp": 0, + "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45", + "last_modified": "2019-02-28T22:40:40.633001Z", + "name": "file_2.csv", + "size_bytes": 59684813, + "sha-512/256": "someothersha" + } + ] + } + ] +} diff --git a/tests/data/twines/valid_manifest_twine.json b/tests/data/twines/valid_manifest_twine.json new file mode 100644 index 0000000..5b939e1 --- /dev/null +++ b/tests/data/twines/valid_manifest_twine.json @@ -0,0 +1,14 @@ +{ + "input_manifest": [ + { + "key": "met_mast_data", + "purpose": "A dataset containing meteorological mast data", + "filters": "tags:(met* AND mast AND location) files:(extension:csv AND sequence:>=0) location:10" + }, + { + "key": "scada_data", + "purpose": "A dataset containing scada data", + "filters": "tags:(met* AND mast) files:(extension:csv AND sequence:>=0) location:10" + } + ] +} diff --git a/tests/test_manifest_strands.py b/tests/test_manifest_strands.py new file mode 100644 index 0000000..0556388 --- /dev/null +++ b/tests/test_manifest_strands.py @@ -0,0 +1,110 @@ +import unittest + +from twined import Twine, exceptions + +from .base import BaseTestCase + + +class TestManifestStrands(BaseTestCase): + """ Testing operation of the Twine class for validation of data using strands which require manifests + """ + + def test_cannot_load_with_no_file_or_json(self): + """ Ensures the correct exception is thrown when manifest is unspecified + """ + + twine_file = self.path + "twines/valid_manifest_twine.json" + twine = Twine(file=twine_file) + with self.assertRaises(exceptions.TwineTypeException): + twine.validate_input_manifest() + + with self.assertRaises(exceptions.TwineTypeException): + twine.validate_output_manifest() + + def test_missing_manifest_files(self): + """ Ensures that if you try to read values from missing files, the right exceptions get raised + """ + twine_file = self.path + "twines/valid_manifest_twine.json" + twine = Twine(file=twine_file) + file = self.path + "not_a_file.json" + with self.assertRaises(exceptions.InputManifestFileNotFound): + twine.validate_input_manifest(file=file) + + with self.assertRaises(exceptions.OutputManifestFileNotFound): + twine.validate_output_manifest(file=file) + + def test_valid_manifest_files(self): + """ Ensures that a manifest file will validate + """ + twine_file = self.path + "twines/valid_manifest_twine.json" + twine = Twine(file=twine_file) + file = self.path + "manifests/inputs/input_valid.json" + twine.validate_input_manifest(file=file) + file = self.path + "manifests/outputs/output_valid.json" + twine.validate_output_manifest(file=file) + + # def test_empty_values(self): + # """ Ensures that appropriate errors are generated for invalid values + # """ + # twine_file = self.path + "twines/valid_schema_twine.json" + # twine = Twine(file=twine_file) + # values_file = self.path + "configurations/empty.json" + # with self.assertRaises(exceptions.InvalidValuesJson): + # twine.validate_configuration(file=values_file) + # + # def test_incorrect_values(self): + # """ Ensures that appropriate errors are generated for invalid values + # """ + # twine_file = self.path + "twines/valid_schema_twine.json" + # twine = Twine(file=twine_file) + # values_file = self.path + "configurations/incorrect.json" + # with self.assertRaises(exceptions.InvalidValuesContents): + # twine.validate_configuration(file=values_file) + # + # def test_missing_not_required_values(self): + # """ Ensures that appropriate errors are generated for missing values + # """ + # twine_file = self.path + "twines/valid_schema_twine.json" + # twine = Twine(file=twine_file) + # values_file = self.path + "outputs/missing_not_required.json" + # twine.validate_output_values(file=values_file) + # + # def test_missing_required_values(self): + # """ Ensures that appropriate errors are generated for missing values + # """ + # twine_file = self.path + "twines/valid_schema_twine.json" + # twine = Twine(file=twine_file) + # values_file = self.path + "inputs/missing_required.json" + # with self.assertRaises(exceptions.InvalidValuesContents): + # twine.validate_input_values(file=values_file) + # + # def test_valid_values_files(self): + # """ Ensures that values can be read and validated correctly from files on disk + # """ + # twine_file = self.path + "twines/valid_schema_twine.json" + # twine = Twine(file=twine_file) + # twine.validate_configuration(file=self.path + "configurations/valid.json") + # twine.validate_input_values(file=self.path + "inputs/valid.json") + # twine.validate_output_values(file=self.path + "outputs/valid.json") + # + # def test_valid_values_json(self): + # """ Ensures that values can be read and validated correctly from a json string + # """ + # twine_file = self.path + "twines/valid_schema_twine.json" + # twine = Twine(file=twine_file) + # values_file = self.path + "configurations/valid.json" + # with open(values_file, "r", encoding="utf-8") as f: + # json_string = f.read() + # twine.validate_configuration(json=json_string) + # + # def test_valid_with_extra_values(self): + # """ Ensures that extra values get ignored + # """ + # twine_file = self.path + "twines/valid_schema_twine.json" + # twine = Twine(file=twine_file) + # values_file = self.path + "configurations/valid_with_extra.json" + # twine.validate_configuration(file=values_file) + + +if __name__ == "__main__": + unittest.main() diff --git a/twined/twine.py b/twined/twine.py index 1de8aae..1a2dfde 100644 --- a/twined/twine.py +++ b/twined/twine.py @@ -128,7 +128,7 @@ def _validate_against_schema(self, strand, data): logger.debug("Validated %s against schema", strand) except ValidationError as e: - raise exceptions.invalid_contents_map[strand](e.message) + raise exceptions.invalid_contents_map[strand](str(e)) def _validate_twine_version(self): """ Validates that the installed version is consistent with an optional version specification in the twine file @@ -244,6 +244,20 @@ def validate_output_values(self, **kwargs): self._validate_against_schema("output_values", data) return data + def validate_input_manifest(self, **kwargs): + """ Validates the input manifest, passed as either a file or a json string + """ + data = self._load_json("input_manifest", **kwargs) + self._validate_against_schema("input_manifest", data) + return data + + def validate_output_manifest(self, **kwargs): + """ Validates the output manifest, passed as either a file or a json string + """ + data = self._load_json("output_manifest", **kwargs) + self._validate_against_schema("output_manifest", data) + return data + # def validate( # self, # configuration=None,