diff --git a/README.md b/README.md index cc5989f..d548ba2 100644 --- a/README.md +++ b/README.md @@ -5,3 +5,53 @@ A library to help digital twins talk to one another. Read more at [twined.readth [![codecov](https://codecov.io/gh/octue/twined/branch/master/graph/badge.svg)](https://codecov.io/gh/octue/twined) [![Documentation Status](https://readthedocs.org/projects/twined/badge/?version=latest)](https://twined.readthedocs.io/en/latest/?badge=latest) +## Developer notes + +**You don't need to pay attention to this unless you plan to develop Twined.** + +### Contributing + +- Please raise an issue on the board (or add your $0.02 to an existing issue) so the maintainers know +what's happening and can advise / steer you. + +- Create a fork of twined, undertake your changes on a new branch (call it whatever you want). + +- Ask the `twined` maintainers *where* to make your pull request. We'll create a version branch, according to the +roadmap, into which you can make your PR. We'll help review the changes and improve the PR. + +- Once checks have passed, test coverage of the new code is >=95%, documentation is updated and the Review is passed, we'll merge into the version branch. + +- Once all the roadmapped features for that version are done, we'll release. + + +### Release process + +The process for creating a new release is as follows: + +1. Check out a branch for the next version, called `vX.Y.Z` +2. Create a Pull Request into the `master` branch. +3. Undertake your changes, committing and pushing to branch `vX.Y.Z` +4. Ensure that documentation is updated to match changes, and increment the changelog. **Pull requests which do not update documentation will be refused.** +5. Ensure that test coverage is sufficient. **Pull requests that decrease test coverage will be refused.** +6. Ensure code meets style guidelines (flake8 tests will fail otherwise) +7. Address Review Comments on the PR +8. Ensure the version in `setup.py` is correct and matches the branch version. +9. Merge to master. Successful test, doc build, flake8 and a new version number will automatically create the release on pypi. +10. Go to code > releases and create a new release on GitHub at the same SHA. + + +### Building documents locally + +**You don't need to do this unless you plan to develop Twined.** + +- Install `doxgen`. On a mac, that's `brew install doxygen`; other systems may differ. + +- Install sphinx and other requirements for building the docs +``` +pip install -r docs/requirements.txt +``` + +- Run the build process +``` +sphinx-build -b html docs/source docs/build +``` \ No newline at end of file diff --git a/docs/source/examples.rst b/docs/source/examples.rst index 20cb638..7bd0957 100644 --- a/docs/source/examples.rst +++ b/docs/source/examples.rst @@ -4,21 +4,95 @@ Examples ======== -Here, we look at example use cases for the library, and show how to use it in python. Many of these are +Here, we look at example use cases for the library, and show how to use it in python. + +It's also well worth looking at the unit test cases copied straight from the unit test cases, so you can always check there to see how everything hooks up. -.. _example_schema: +.. _example_equipment_installation_cost: -Example Schema -============== +[Simple] Equipment installation cost +==================================== .. tabs:: - .. code-tab:: py + .. group-tab:: Scenario + + You need to provide your team with an estimate for installation cost of an equipment foundation. + + It's a straightforward calculation for you, but the Logistics Team keeps changing the installation position, to + try and optimise the overall project logistics. + + Each time the locations change, the GIS team gives you an updated embedment depth, which is what you use + (along with steel cost and foundation type), to calculate cost and report it back. + + This twine allows you to define to create a wrapper around your scripts that communicates to the GIS team what you + need as an input, communicate to the logistics team what they can expect as an output. + + When deployed as a digital twin, the calculation gets automatically updated, leaving you free to get on with + all the other work! + + .. group-tab:: Twine + + We specify the ``steel_cost`` and ``foundation_type`` as ``configuration`` values, which you can set on startup of the twin. + + Once the twin is running, it requires the ``embedment_depth`` as an ``input_value`` from the GIS team. A member + of the GIS team can use your twin to get ``foundation_cost`` directly. - import numpy as np - import es + .. code-block:: javascript - def main(): - pass + { + "title": "Foundation Cost Model", + "description": "This twine helps compute the cost of an installed foundation.", + "children": [ + ], + "configuration_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "Foundation cost twin configuration", + "description": "Set config parameters and constants at startup of the twin.", + "type": "object", + "properties": { + "steel_cost": { + "description": "The cost of steel in GBP/m^3. To get a better predictive model, you could add an economic twin that forecasts the cost of steel using the project timetable.", + "type": "number", + "minimum": 0, + "default": 3000 + }, + "foundation_type": { + "description": "The type of foundation being used.", + "type": "string", + "pattern": "^(monopile|twisted-jacket)$", + "default": "monopile" + } + } + }, + "input_values_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "Input Values schema for the foundation cost twin", + "description": "These values are supplied to the twin asynchronously over a web socket. So as these values change, the twin can reply with an update.", + "type": "object", + "properties": { + "embedment_depth": { + "description": "Embedment depth in metres", + "type": "number", + "minimum": 10, + "maximum": 500 + } + } + }, + "output_manifest": [ + ], + "output_values_schema": { + "title": "Output Values schema for the foundation cost twin", + "description": "The response supplied to a change in input values will always conform to this schema.", + "type": "object", + "properties": { + "foundation_cost": { + "description": "The foundation cost.", + "type": "integer", + "minimum": 2 + } + } + } + } diff --git a/docs/source/images/digital_twin_component_for_simulation.svg b/docs/source/images/digital_twin_component_for_simulation.svg new file mode 100644 index 0000000..7d47cc6 --- /dev/null +++ b/docs/source/images/digital_twin_component_for_simulation.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/source/images/digital_twin_hierarchy_extended.svg b/docs/source/images/digital_twin_hierarchy_extended.svg new file mode 100644 index 0000000..24f415d --- /dev/null +++ b/docs/source/images/digital_twin_hierarchy_extended.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index 8574b0c..f35c0a4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -24,7 +24,12 @@ Twined A digital twin is a virtual representation of a real life being - a physical asset like a wind turbine or car - or even a human. Like real things, digital twins need to interact, so can be connected together, but need a common communication -framework to do so. This is what is provided by **twined**. +framework to do so. + +**twined** helps you to define a single file, a "twine", that defines a digital twin, specifying its data +interfaces, connections to other twins, and other requirements. + +Any person, or any computer, can read a twine and understand *what-goes-in* and *what-comes-out*. .. figure:: images/digital_twin_hierarchy.svg :width: 350px @@ -41,7 +46,7 @@ framework to do so. This is what is provided by **twined**. Aims ==== -**twined** provides a toolkit to help create and validate ":ref:`schema`" - descriptions of a digital twin, what data it +**twined** provides a toolkit to help create and validate "twines" - descriptions of a digital twin, what data it requires, what it does and how it works. The goals of **twined** are as follows: @@ -50,8 +55,9 @@ The goals of **twined** are as follows: - Provide functions to check that a schema itself is valid - Provide (or direct you to) tools to create schema describing what you require -Using :ref:`schema`, we can describe how digital twins connect and interact... building them together in hierarchies and -networks. +In :ref:`schema`, we describe the different parts of a twine (examining how digital twins connect and interact... +building them together in hierarchies and networks). But you may prefer to dive straight in with the :ref:`quick_start` +guide. The scope of **twined** is not large. Many other libraries will deal with hosting and deploying digital twins, still more will deal with the actual analyses done within them. **twined** purely deals with parsing and checking the @@ -107,12 +113,12 @@ and are willing to consider sponsorship of development and maintenance of that l .. toctree:: :maxdepth: 2 - :hidden: self - digital_twins - schema installation + quick_start examples + digital_twins + schema license version_history diff --git a/docs/source/quick_start.rst b/docs/source/quick_start.rst new file mode 100644 index 0000000..546ddaa --- /dev/null +++ b/docs/source/quick_start.rst @@ -0,0 +1,95 @@ +.. _quick_start: + +============ +Quick Start +============ + +.. _create_a_twine: + +Create your first twine +======================= + +Let's say we want a digital twin that accepts two values, uses them to make a calculation, then gives the result. Anyone connecting to the twin will need to know what values it requires, and what it responds with. + +First, create a blank text file, call it `twine.json`. We'll give the twin a title and description. +Paste in the following: + +.. code-block:: javascript + + { + "title": "My first digital twin... of an atomising discombobulator", + "description": "A simple example... estimates the `foz` value of an atomising discombobulator." + } + +Now, let's define an input values strand, to specify what values are required by the twin. For this we use a json schema +(you can read more about them in :ref:`introducing_json_schema`). Add the ``input_values`` field, so your twine looks like this: + +.. code-block:: javascript + + { + "title": "My first digital twin", + "description": "A simple example to build on..." + "input_values_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "Input Values schema for my first digital twin", + "description": "These values are supplied to the twin by another program (often over a websocket, depending on your integration provider). So as these values change, the twin can reply with an update.", + "type": "object", + "properties": { + "foo": { + "description": "The foo value... speed of the discombobulator's input bobulation module, in m/s", + "type": "number", + "minimum": 10, + "maximum": 500 + }, + "baz": { + "description": "The baz value... period of the discombobulator's recombulation unit, in s", + "type": "number", + "minimum": 0, + "maximum": 1000 + } + } + } + } + +Finally, let's define an output values strand, to define what kind of data is returned by the twin: + +.. code-block:: javascript + + "output_values_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "Output Values schema for my first digital twin", + "description": "The twin will output data that matches this schema", + "type": "object", + "properties": { + "foz": { + "description": "Estimate of the foz value... efficiency of the discombobulator in %", + "type": "number", + "minimum": 10, + "maximum": 500 + } + } + } + + +.. _load_the_twine: + +Load the twine +============== + +**twined** provides a `Twine()` class to load a twine (from a file or a json string). +The loading process checks the twine is valid. It's as simple as: + +.. code-block:: py + + from twined import Twine + + my_twine = Twine(file='twine.json') + + +.. _validate_some_inputs: + +Validate some inputs +==================== + +.. ATTENTION:: + LIBRARY IS UNDER CONSTRUCTION! WATCH THIS SPACE! \ No newline at end of file diff --git a/docs/source/schema.rst b/docs/source/schema.rst index 7930c92..278c07e 100644 --- a/docs/source/schema.rst +++ b/docs/source/schema.rst @@ -1,10 +1,13 @@ .. _schema: -====== -Schema -====== +===================== +About Twines (Schema) +===================== -This is the core of **twined**, whose whole purpose is to provide and use schemas for digital twins.. +The core of **twined** is to provide and use schemas for digital twins. + +Below, we set out requirements and a framework for creating a *schema* to represent a digital twin. +We call these schema "twines". To just get started building a **twine**, check out the :ref:`_quick_start`. .. _requirements: diff --git a/setup.py b/setup.py index 8c7d9c6..ba13f1e 100644 --- a/setup.py +++ b/setup.py @@ -15,9 +15,9 @@ setup( name='twined', - version='0.0.4', + version='0.0.5', py_modules=[], - install_requires=[], + install_requires=['jsonschema ~= 3.2.0'], url='https://www.github.com/octue/twined', license=license_text, author='Octue (github: octue)', diff --git a/tests/data/empty_app/twine.json b/tests/data/empty_app/twine.json new file mode 100644 index 0000000..39b9e14 --- /dev/null +++ b/tests/data/empty_app/twine.json @@ -0,0 +1,33 @@ +{ + "children": [ + ], + "configuration_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "The example configuration form", + "description": "The configuration strand of an example twine", + "type": "object", + "properties": { + } + }, + "credentials": [ + ], + "input_manifest": [ + ], + "input_values_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "Input Values", + "description": "The input values strand of an example twine", + "type": "object", + "properties": { + } + }, + "output_manifest": [ + ], + "output_values_schema": { + "title": "Output Values", + "description": "The output values strand of an example twine", + "type": "object", + "properties": { + } + } +} diff --git a/tests/data/example_app/input/config.json b/tests/data/example_app/input/config.json new file mode 100644 index 0000000..9a2c60b --- /dev/null +++ b/tests/data/example_app/input/config.json @@ -0,0 +1,3 @@ +{ + "n_iterations": 16 +} \ No newline at end of file diff --git a/tests/data/example_app/input/manifest.json b/tests/data/example_app/input/manifest.json new file mode 100644 index 0000000..a994280 --- /dev/null +++ b/tests/data/example_app/input/manifest.json @@ -0,0 +1,43 @@ +{ + "id": "8ead7669-8162-4f64-8cd5-4abe92509e17", + "type": "input", + "datasets": [ + { + "id": "7ead7669-8162-4f64-8cd5-4abe92509e17", + "name": "my meteorological dataset", + "tags": "met, mast, wind", + "files": [ + { + "path": "input/datasets/7ead7669/file_1.csv", + "cluster": 0, + "sequence": 0, + "extension": "csv", + "tags": "", + "posix_timestamp": null, + "data_file": { + "id": "abff07bc-7c19-4ed5-be6d-a6546eae8e86", + "last_modified": "2019-02-28T22:40:30.533005Z", + "name": "file_1.csv", + "size_bytes": 59684813, + "sha-512/256": "somesha" + } + }, + { + "path": "input/datasets/7ead7669/file_2.csv", + "cluster": 0, + "sequence": 1, + "extension": "csv", + "tags": "", + "posix_timestamp": null, + "data_file": { + "id": "bbff07bc-7c19-4ed5-be6d-a6546eae8e45", + "last_modified": "2019-02-28T22:40:40.633001Z", + "name": "file_2.csv", + "size_bytes": 59684813, + "sha-512/256": "someothersha" + } + } + ] + } + ] +} diff --git a/tests/data/example_app/twine.json b/tests/data/example_app/twine.json new file mode 100644 index 0000000..394833d --- /dev/null +++ b/tests/data/example_app/twine.json @@ -0,0 +1,78 @@ +{ + "children": [ + { + "key": "turbines", + "purpose": "wind turbines in a farm", + "filters": "tags:(met* AND mast AND location) files:(extension:csv AND sequence:>=0) location:10" + } + ], + "configuration_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "The example configuration form", + "description": "The configuration strand of an example twine", + "type": "object", + "properties": { + "n_iterations": { + "description": "An example of an integer configuration variable, called 'n_iterations'.", + "type": "integer", + "minimum": 1, + "maximum": 10, + "default": 5 + } + } + }, + "credentials": [ + { + "name": "MYAPI_SECRET_KEY", + "purpose": "Token for accessing the MyApi service" + }, + { + "name": "MY_DATABASE_URI", + "purpose": "A URI for accessing an external database from within a twin or analysis" + } + ], + "input_manifest": [ + { + "key": "met_mast_data", + "purpose": "A dataset containing meteorological mast data", + "filters": "tags:(met* AND mast AND location) files:(extension:csv AND sequence:>=0) location:10" + }, + { + "key": "scada_data", + "purpose": "A dataset containing scada data", + "filters": "tags:(met* AND mast) files:(extension:csv AND sequence:>=0) location:10" + } + ], + "input_values_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "Input Values", + "description": "The input values strand of an example twine", + "type": "object", + "properties": { + "width": { + "description": "An example of an integer value called 'width'", + "type": "integer", + "minimum": 2 + } + } + }, + "output_manifest": [ + { + "key": "production_data", + "purpose": "A dataset containing production data", + "tags": "production, wind" + } + ], + "output_values_schema": { + "title": "Output Values", + "description": "The output values strand of an example twine", + "type": "object", + "properties": { + "width": { + "description": "An example of an integer value called 'result'", + "type": "integer", + "minimum": 2 + } + } + } +} diff --git a/tests/data/simple_app/input/config.json b/tests/data/simple_app/input/config.json new file mode 100644 index 0000000..59a6167 --- /dev/null +++ b/tests/data/simple_app/input/config.json @@ -0,0 +1,9 @@ +{ + "width": 600, + "height": 600, + "max_iterations": 16, + "color_scale": "YlGnBu", + "x_range": [-1.5, 0.6], + "y_range": [-1.26, 1.26], + "type": "png" +} \ No newline at end of file diff --git a/tests/data/simple_app/twine.json b/tests/data/simple_app/twine.json new file mode 100644 index 0000000..8b86eac --- /dev/null +++ b/tests/data/simple_app/twine.json @@ -0,0 +1,72 @@ +{ + "configuration_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "Configuration for a simple app", + "description": "The app creates a mandelbrot plot", + "type": "object", + "properties": { + "width": { + "description": "Number of pixels the image contains in the x direction", + "type": "integer", + "minimum": 2, + "default": 600 + }, + "height": { + "description": "Number of pixels the image contains in the y direction", + "type": "integer", + "minimum": 2, + "default": 600 + }, + "max_iterations": { + "description": "Maximum number of iterations used to render each pixel", + "type": "integer", + "minimum": 2, + "default": 64 + }, + "color_scale": { + "description": "The colour scale string to use when mapping colours. See https://plot.ly/ipython-notebooks/color-scales/ for valid scales", + "type": "string", + "enum": ["PuBu", "YlGnBu"], + "default": "YlGnBu" + }, + "type": { + "description": "Type (jpeg or png) of the image that will be produced as a results file", + "type": "string", + "enum": ["jpg", "png"], + "default": "png" + }, + "x_range": { + "description": "The x_min to x_max range of space in which to render the fractal", + "type": "array", + "items": [{ + "type": "number" + }, + { + "type": "number" + }, + { + "type": "number" + }], + "additionalItems": false, + "default": [-1.5, 0.6] + }, + "y_range": { + "description": "The y_min to y_max range of space in which to render the fractal", + "type": "array", + "items": [{ + "type": "number" + }, + { + "type": "number" + }, + { + "type": "number" + }], + "additionalItems": false, + "default": [-1.26, 1.26] + } + } + }, + "output_manifest": [ + ] +} diff --git a/tests/data/twines/invalid_children_dict_not_array_twine.json b/tests/data/twines/invalid_children_dict_not_array_twine.json new file mode 100644 index 0000000..66245b2 --- /dev/null +++ b/tests/data/twines/invalid_children_dict_not_array_twine.json @@ -0,0 +1,3 @@ +{ + "children": {} +} diff --git a/tests/data/twines/invalid_children_no_key_twine.json b/tests/data/twines/invalid_children_no_key_twine.json new file mode 100644 index 0000000..1515fd3 --- /dev/null +++ b/tests/data/twines/invalid_children_no_key_twine.json @@ -0,0 +1,9 @@ +{ + "children": [ + { + "purpose": "Something to do with GIS data.", + "notes": "This filter (which can use the extremely powerful 'lucene' query syntax)\n allows the digital twin to locate other digital twins (public across octue or\n private in your workspace) which can provide the data you need.", + "filters": "tags:gis" + } + ] +} diff --git a/tests/data/twines/invalid_credentials_dict_not_array_twine.json b/tests/data/twines/invalid_credentials_dict_not_array_twine.json new file mode 100644 index 0000000..eb4f55a --- /dev/null +++ b/tests/data/twines/invalid_credentials_dict_not_array_twine.json @@ -0,0 +1,6 @@ +{ + "credentials": { + "name": "MY_API_SECRET_KEY", + "purpose": "Token for accessing a 3rd party API service" + } +} diff --git a/tests/data/twines/invalid_credentials_lowercase_name_twine.json b/tests/data/twines/invalid_credentials_lowercase_name_twine.json new file mode 100644 index 0000000..30afae5 --- /dev/null +++ b/tests/data/twines/invalid_credentials_lowercase_name_twine.json @@ -0,0 +1,8 @@ +{ + "credentials": [ + { + "name": "my_secrets_should_be_uppercase", + "purpose": "Token for accessing a 3rd party API service" + } + ] +} diff --git a/tests/data/twines/invalid_credentials_no_name_twine.json b/tests/data/twines/invalid_credentials_no_name_twine.json new file mode 100644 index 0000000..f809163 --- /dev/null +++ b/tests/data/twines/invalid_credentials_no_name_twine.json @@ -0,0 +1,7 @@ +{ + "credentials": [ + { + "purpose": "credentials without a name should be invalid" + } + ] +} diff --git a/tests/data/twines/invalid_credentials_space_in_name_twine.json b/tests/data/twines/invalid_credentials_space_in_name_twine.json new file mode 100644 index 0000000..522cd7c --- /dev/null +++ b/tests/data/twines/invalid_credentials_space_in_name_twine.json @@ -0,0 +1,8 @@ +{ + "credentials": [ + { + "name": "MY NAME SHOULD NOT HAVE WHITESPACE", + "purpose": "Token for accessing a 3rd party API service" + } + ] +} diff --git a/tests/data/twines/invalid_json_twine.json b/tests/data/twines/invalid_json_twine.json new file mode 100644 index 0000000..636e70e --- /dev/null +++ b/tests/data/twines/invalid_json_twine.json @@ -0,0 +1,11 @@ +{ + "children": [ + "configuration_schema": { + "$schema": "http://json-schema.org/2019-09/schema#", + "title": "The example configuration form", + "description": "The configuration strand of an example twine", + "type": "object", + "properties": { + } + }, +} diff --git a/tests/data/twines/valid_children_twine.json b/tests/data/twines/valid_children_twine.json new file mode 100644 index 0000000..2293ec6 --- /dev/null +++ b/tests/data/twines/valid_children_twine.json @@ -0,0 +1,10 @@ +{ + "children": [ + { + "key": "gis", + "purpose": "Something to do with GIS data.", + "notes": "Some internal note about how the filters work or similar.", + "filters": "tags:gis" + } + ] +} diff --git a/tests/test_children.py b/tests/test_children.py new file mode 100644 index 0000000..83ad5e7 --- /dev/null +++ b/tests/test_children.py @@ -0,0 +1,60 @@ +import unittest +from twined import Twine, exceptions + + +class TestChildrenTwine(unittest.TestCase): + """ Tests related to the twine itself - ensuring that valid and invalid + `children` entries in a twine file work as expected + """ + + def test_invalid_children_dict_not_array(self): + """ Ensures InvalidTwine exceptions are raised when instantiating twines where `children` entry is incorrectly + specified as a dict, not an array + """ + twine_file = 'data/twines/invalid_children_dict_not_array_twine.json' + with self.assertRaises(exceptions.InvalidTwine): + Twine(file=twine_file) + + def test_invalid_children_no_key(self): + """ Ensures InvalidTwine exceptions are raised when instantiating twines where a child + is specified without the required `key` field + """ + twine_file = 'data/twines/invalid_children_no_key_twine.json' + with self.assertRaises(exceptions.InvalidTwine): + Twine(file=twine_file) + + def test_valid_children(self): + """ Ensures that a twine can be instantiated with correctly specified children + """ + twine_file = 'data/twines/valid_children_twine.json' + twine = Twine(file=twine_file) + self.assertEqual(len(twine._raw['children']), 1) + + +class TestChildrenValidation(unittest.TestCase): + """ Tests related to whether validation of children occurs successfully (given a valid twine) + """ + + def test_no_children(self): + """ Test that a twine with no children will validate on an empty children input + """ + raise exceptions.NotImplementedYet() + + def test_missing_children(self): + """ Test that a twine with children will not validate on an empty children input + """ + raise exceptions.NotImplementedYet() + + def test_extra_children(self): + """ Test that a twine with no children will not validate a non-empty children input + """ + raise exceptions.NotImplementedYet() + + def test_matched_children(self): + """ Test that a twine with children required will validate when the children input matches + """ + raise exceptions.NotImplementedYet() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_credentials.py b/tests/test_credentials.py new file mode 100644 index 0000000..27931e9 --- /dev/null +++ b/tests/test_credentials.py @@ -0,0 +1,62 @@ +import unittest +from twined import Twine, exceptions + + +class TestCredentialsTwine(unittest.TestCase): + """ Tests related to the twine itself - ensuring that valid and invalid + `credentials` entries in a twine file work as expected + """ + + def test_fails_on_no_name(self): + """ Ensures InvalidTwine exceptions are raised when instantiating twines + with a missing `name` field in a credential + """ + twine_file = 'data/twines/invalid_credentials_no_name_twine.json' + with self.assertRaises(exceptions.InvalidTwine): + Twine(file=twine_file) + + def test_fails_on_lowercase_name(self): + """ Ensures InvalidTwine exceptions are raised when instantiating twines + with lowercase letters in the `name` field + """ + twine_file = 'data/twines/invalid_credentials_lowercase_name_twine.json' + with self.assertRaises(exceptions.InvalidTwine): + Twine(file=twine_file) + + def test_fails_on_dict(self): + """ Ensures InvalidTwine exceptions are raised when instantiating twines + with invalid `credentials` entries (given as a dict, not an array) + """ + twine_file = 'data/twines/invalid_credentials_dict_not_array_twine.json' + with self.assertRaises(exceptions.InvalidTwine): + Twine(file=twine_file) + + def test_fails_on_name_whitespace(self): + twine_file = 'data/twines/invalid_credentials_space_in_name_twine.json' + with self.assertRaises(exceptions.InvalidTwine): + Twine(file=twine_file) + + +class TestCredentialsValidation(unittest.TestCase): + """ Tests related to whether validation of children occurs successfully (given a valid twine) + """ + + def test_no_credentials(self): + """ Test that a twine with no credentials will validate straightforwardly + """ + raise exceptions.NotImplementedYet() + + def test_missing_credentials(self): + """ Test that a twine with credentials will not validate where they are missing from the environment + """ + raise exceptions.NotImplementedYet() + + def test_matched_credentials(self): + """ Test that a twine with credentials required will validate when the credentials are available in the + environment + """ + raise exceptions.NotImplementedYet() + + +if __name__ == '__main__': + unittest.main() diff --git a/tests/test_schema.py b/tests/test_schema.py deleted file mode 100644 index ef84cfa..0000000 --- a/tests/test_schema.py +++ /dev/null @@ -1,17 +0,0 @@ -import unittest - - -class TestSchema(unittest.TestCase): - - def setUp(self): - pass - - def tearDown(self): - pass - - def test_nothing(self): - pass - - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_twine.py b/tests/test_twine.py new file mode 100644 index 0000000..fa1beba --- /dev/null +++ b/tests/test_twine.py @@ -0,0 +1,83 @@ +import unittest +from twined import Twine, exceptions + + +class TestTwine(unittest.TestCase): + + def test_init_twine_with_filename(self): + """ Ensures that the twine class can be instantiated with a file + """ + twine_file = 'data/simple_app/twine.json' + Twine(file=twine_file) + + def test_init_twine_with_json(self): + """ Ensures that a twine can be instantiated with a json string + """ + with open('data/simple_app/twine.json', 'r', encoding='utf-8') as f: + json_string = f.read() + Twine(json=json_string) + + def test_init_twine_with_incorrect_file_string(self): + """ Ensures that error is raised instantiating with a non-filename + """ + with self.assertRaises(exceptions.MissingTwine): + Twine(file='{"mistakenly_passed": "json instead of filename"}') + + def test_init_twine_with_both_inputs(self): + """ Ensures that error is raised when attempting to instantiate with both file and json inputs + """ + with self.assertRaises(exceptions.InvalidInput): + Twine( + file='data/simple_app/twine.json', + json='{"input_values": "something"}' + ) + + def test_missing_twine_file(self): + """ Ensures that an absent file raises a MissingTwine exception + """ + twine_file = 'file_is_missing.json' + with self.assertRaises(exceptions.MissingTwine): + Twine(file=twine_file) + + def test_no_twine(self): + """ Tests that the canonical-but-useless case of no twine provided validates whilst issuing a warning + """ + with self.assertLogs(level='WARNING') as log: + Twine() + self.assertEqual(len(log.output), 1) + self.assertEqual(len(log.records), 1) + self.assertIn('No twine file specified', log.output[0]) + + def test_empty_twine(self): + """ Ensures that an empty twine file can be loaded + """ + twine_file = 'data/empty_app/twine.json' + with self.assertLogs(level='DEBUG') as log: + Twine(file=twine_file) + self.assertEqual(len(log.output), 2) + self.assertEqual(len(log.records), 2) + self.assertIn('Loaded', log.output[0]) + self.assertIn('Validated', log.output[1]) + + def test_example_twine(self): + """ Ensures that the example (full) twine can be loaded and validated + """ + twine_file = 'data/example_app/twine.json' + Twine(file=twine_file) + + def test_simple_twine(self): + """ Ensures that the simple app schema can be loaded and used to parse some basic config and values data + """ + twine_file = 'data/simple_app/twine.json' + Twine(file=twine_file) + + def test_broken_json_twine(self): + """ Ensures that an invalid json file raises an InvalidTwine exception + """ + twine_file = 'data/twines/invalid_json_twine.json' + with self.assertRaises(exceptions.InvalidTwine): + Twine(file=twine_file) + + +if __name__ == '__main__': + unittest.main() diff --git a/tox.ini b/tox.ini index 73449e6..f204def 100644 --- a/tox.ini +++ b/tox.ini @@ -5,7 +5,7 @@ envlist = {py36,py37},py36-flake8 setenv = PYTHONPATH = {toxinidir}:{toxinidir}/twined commands = - coverage run --source twined tests/test_schema.py + coverage run --source twined -m unittest discover coverage report --show-missing deps = jsonschema==3.0.2 diff --git a/twined/__init__.py b/twined/__init__.py index e69de29..7b18929 100644 --- a/twined/__init__.py +++ b/twined/__init__.py @@ -0,0 +1 @@ +from .twine import Twine # noqa: F401 diff --git a/twined/exceptions.py b/twined/exceptions.py new file mode 100644 index 0000000..06361f2 --- /dev/null +++ b/twined/exceptions.py @@ -0,0 +1,49 @@ + + +class TwineException(Exception): + """ All exceptions raised by the twine framework inherits from TwineException""" + + +class InvalidTwine(TwineException): + """ Raised when the specified twine is invalid + """ + + +class MissingTwine(TwineException): + """ Raised when the specified twine file is not present + """ + + +class InvalidInput(TwineException): + """ Raised when an object is instantiated or a function called with invalid inputs + """ + + +class FolderNotPresent(InvalidInput): + """ Raised when a required folder (e.g. /input) cannot be found + """ + + +class ManifestNotFound(InvalidInput): + """ Raised when a multi manifest can not be refined to a single manifest in a search + """ + + +class InvalidManifest(InvalidInput): + """ Raised when a manifest loaded from JSON does not pass validation + """ + + +class InvalidManifestType(InvalidManifest): + """ Raised when user attempts to create a manifest of a type other than 'input', 'output' or 'build' + """ + + +class NotImplementedYet(TwineException): + """ Raised when you attempt to use a function whose high-level API is in place, but which is not implemented yet + """ + + +class UnexpectedNumberOfResults(TwineException): + """ Raise when searching for a single data file (or a particular number of data files) and the number of results exceeds that expected + """ diff --git a/twined/schema/twine_schema.json b/twined/schema/twine_schema.json new file mode 100644 index 0000000..cf659f7 --- /dev/null +++ b/twined/schema/twine_schema.json @@ -0,0 +1,109 @@ +{ + "type": "object", + "$schema": "http://json-schema.org/2019-09/schema#", + "properties": { + "children": { + "type": "array", + "items": { + "type": "object", + "properties": { + "key": { + "description": "A textual key identifying a group of child twins", + "type": "string" + }, + "purpose": { + "description": "What this group of child twins are used for", + "type": "string", + "default": "" + }, + "filters": { + "description": "A search term, using the Lucene Query Language, which can be used to automatically refine the list of available child twins down to ones suitable for use here.", + "type": "string", + "default": "" + } + }, + "required": [ + "key" + ] + } + }, + "configuration_schema": { + "type": "object", + "required": ["properties"] + }, + "credentials": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "description": "The credential name, in upper snake case, eg 'MYAPI_SECRET_KEY'", + "type": "string", + "pattern": "^[A-Z]+(?:_[A-Z]+)*$" + }, + "purpose": { + "description": "What this credential is used for, eg 'Token for accessing the MyApi service'", + "type": "string" + } + }, + "required": [ + "name" + ] + } + }, + "input_manifest": { + "type": "array", + "description": "A list of entries, each describing a dataset that should be attached to / made available to the digital twin", + "items": { + "type": "object", + "properties": { + "key": { + "description": "A textual key identifying this dataset within the application/twin", + "type": "string" + }, + "purpose": { + "description": "What data this dataset contains, eg 'the set of data files from the energy production calculation process'", + "type": "string", + "default": "" + }, + "filters": { + "description": "A search term, using the Lucene Query Language, which can be used to automatically refine the list of available datasets down to ones suitable for use with this twin", + "type": "string", + "default": "" + } + }, + "required": ["key"] + } + }, + "input_values_schema": { + "type": "object" + }, + "output_manifest": { + "type": "array", + "description": "A list of entries, each describing a dataset that may be created/updated when the twin is run", + "items": { + "type": "object", + "properties": { + "key": { + "description": "A textual key identifying this dataset within the application/twin", + "type": "string" + }, + "purpose": { + "description": "What data this dataset contains, eg 'the set of data files from the energy production calculation process'", + "type": "string", + "default": "" + }, + "tags": { + "description": "Comma separated tags that will be applied to the dataset when created", + "type": "string", + "default": "" + } + }, + "required": ["key"] + } + }, + "output_values_schema": { + "type": "object" + } + } +} diff --git a/twined/twine.py b/twined/twine.py new file mode 100644 index 0000000..1d5c752 --- /dev/null +++ b/twined/twine.py @@ -0,0 +1,80 @@ +import json as jsonlib +import logging +import pkg_resources +from . import exceptions +from jsonschema import validate as jsonschema_validate, ValidationError + + +logger = logging.getLogger(__name__) + + +class Twine: + + def __init__(self, **kwargs): + """ Instantiate a twine class, providing a file name or a utf-8 encoded string containing valid json. + The twine is itself validated to be correct against the twine schema. + + Note: Instantiating the twine does not validate that any inputs to an application are correct - it merely + checks that the twine itself is correct. + + """ + self._load_twine(**kwargs) + + def _load_twine(self, file=None, json=None): + """ Load twine from a *.json file or a json string and validate its contents + """ + + # Default twine with nothing in it + if (file is None) and (json is None): + self._raw = {} + logger.warning('No twine file specified. Loading empty twine.') + return + + # Decode the json string and deserialize to objects + try: + # From the file... + if file is not None: + if json is not None: + raise exceptions.InvalidInput('You cannot specify both file and json inputs') + + try: + with open(file) as f: + self._raw = jsonlib.load(f) + logger.debug('Loaded twine from file %s', file) + except FileNotFoundError as e: + raise exceptions.MissingTwine(e) + + # Directly from the string... + else: + self._raw = jsonlib.loads(json) + logger.debug('Loaded twine from input json string') + + self._validate_twine() + + except jsonlib.decoder.JSONDecodeError as e: + raise exceptions.InvalidTwine(e) + + def _validate_twine(self): + """ Validate that the loaded twine contains all required parts and that each part is valid. + + A twine *contains* schema, but we also need to verify that it matches a certain schema itself. + + """ + twine_schema = jsonlib.loads(pkg_resources.resource_string('twined', 'schema/twine_schema.json')) + + try: + jsonschema_validate(instance=self._raw, schema=twine_schema) + logger.debug('Validated raw twine against schema') + + except ValidationError as e: + raise exceptions.InvalidTwine(e.message) + + def validate( + self, + configuration=None, + manifest=None, + credentials=None, + monitors=None, + logs=None, + ): + pass