diff --git a/dmci/api/app.py b/dmci/api/app.py
index 3fcbefb..bf0f57b 100644
--- a/dmci/api/app.py
+++ b/dmci/api/app.py
@@ -25,7 +25,7 @@
import uuid
from lxml import etree
-from flask import request, Flask, after_this_request
+from flask import request, Flask
from dmci.api.worker import Worker
@@ -55,7 +55,7 @@ def __init__(self):
# Set up api entry points
@self.route("/v1/insert", methods=["POST"])
- def base():
+ def post_insert():
max_permitted_size = self._conf.max_permitted_size
if request.content_length > max_permitted_size:
@@ -63,25 +63,32 @@ def base():
data = request.get_data()
+ # Cache the job file
file_uuid = uuid.uuid4()
path = self._conf.distributor_cache
full_path = os.path.join(path, f"{file_uuid}.Q")
+ msg, code = self._persist_file(data, full_path)
+ if code != 200:
+ return msg, code
+ # Run the validator
worker = Worker(full_path, self._xsd_obj)
-
- @after_this_request
- def dist(response):
- nonlocal worker
- worker.distribute()
- return response
-
- result, msg = worker.validate(data)
- if result:
- return self._persist_file(data, full_path)
+ valid, msg = worker.validate(data)
+ if not valid:
+ return msg, 400
+
+ # Run the distributors
+ err = []
+ status, valid, _, failed, skipped = worker.distribute()
+ if not status:
+ err.append("The following distributors failed: %s" % ", ".join(failed))
+ if not valid:
+ err.append("The following jobs were skipped: %s" % ", ".join(skipped))
+
+ if err:
+ return "\n".join(err), 500
else:
- return msg, 500
-
- # TODO: shouldn't msg be logged?
+ return "Everything is OK", 200
return
@@ -99,7 +106,7 @@ def _persist_file(data, full_path):
except Exception as e:
logger.error(str(e))
- return "Can't write to file", 507
+ return "Cannot write xml data to cache file", 507
return "", 200
diff --git a/dmci/api/worker.py b/dmci/api/worker.py
index 7a3bd21..4b14238 100644
--- a/dmci/api/worker.py
+++ b/dmci/api/worker.py
@@ -24,7 +24,7 @@
from lxml import etree
from dmci import CONFIG
-from dmci.mmd_tools import full_check
+from dmci.tools import CheckMMD
from dmci.distributors import FileDist, PyCSWDist
logger = logging.getLogger(__name__)
@@ -111,13 +111,15 @@ def distribute(self):
**self._kwargs
)
valid &= obj.is_valid()
- if obj.is_valid:
+ if obj.is_valid():
obj_status = obj.run()
status &= obj_status
if obj_status:
called.append(dist)
else:
failed.append(dist)
+ else:
+ skipped.append(dist)
return status, valid, called, failed, skipped
@@ -139,11 +141,15 @@ def _check_information_content(self, data):
# Check XML file
logger.info("Performing in depth checking.")
- valid = full_check(xml_doc)
+ checker = CheckMMD()
+ valid = checker.full_check(xml_doc)
if valid:
msg = "Input MMD XML file is ok"
else:
+ _, _, err = checker.status()
msg = "Input MMD XML file contains errors, please check your file"
+ if err:
+ msg += "\n" + "\n".join(err)
return valid, msg
diff --git a/dmci/mmd_tools/check_mmd.py b/dmci/mmd_tools/check_mmd.py
deleted file mode 100644
index 1184fa3..0000000
--- a/dmci/mmd_tools/check_mmd.py
+++ /dev/null
@@ -1,237 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-DMCI : MMD Checker Functions
-============================
-
-Copyright 2021 MET Norway
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-
-import logging
-import pythesint as pti
-
-from lxml import etree
-from urllib.parse import urlparse
-
-logger = logging.getLogger(__name__)
-
-def check_rectangle(rectangle):
- """Check if element geographic extent/rectangle is valid:
- - only 1 existing rectangle element
- - rectangle has north / south / west / east subelements
- - -180 <= min_lat <= max_lat <= 180
- - 0 <= min_lon <= max_lon <= 360
- Args:
- rectangle: list of elements found when requesting node(s) geographic_extent/rectangle
- (output of ET request findall)
- Returns:
- True / False
- """
- directions = dict.fromkeys(['north', 'south', 'west', 'east'], None)
-
- ok = True
- if len(rectangle) > 1:
- logger.debug("NOK: Multiple rectangle elements in file.")
- return False
-
- for child in rectangle[0]:
- # Also removes namespace, if any
- child_ns = etree.QName(child)
- directions[child_ns.localname] = float(child.text)
-
- for key, val in directions.items():
- if val is None:
- logger.error("NOK: Missing rectangle element %s" % key)
- return False
-
- if not (-180 <= directions['west'] <= directions['east'] <= 180):
- logger.debug("NOK: Longitudes not ok")
- ok = False
- if not (-90 <= directions['south'] <= directions['north'] <= 90):
- logger.debug("NOK: Latitudes not ok")
- ok = False
- if not ok:
- logger.debug(directions)
-
- return ok
-
-def check_url(url, allow_no_path=False):
- """Check that an URL is valid.
- """
- try:
- parsed = urlparse(url)
- if parsed.scheme not in ("http", "https", "ftp", "sftp"):
- logger.debug(f"NOK: {url}")
- logger.debug("URL scheme not allowed")
- return False
-
- if not (parsed.netloc and "." in parsed.netloc):
- logger.debug(f"NOK: {url}")
- logger.debug("No valid domain in URL")
- return False
-
- if not (parsed.path or allow_no_path):
- logger.debug(f"NOK: {url}")
- logger.debug("No path in URL")
- return False
-
- except Exception:
- logger.debug(f"NOK: {url}")
- logger.debug("URL cannot be parsed by urllib")
- return False
-
- try:
- url.encode("ascii")
- except Exception:
- logger.debug(f"NOK: {url}")
- logger.debug("URL contains non-ASCII characters")
- return False
-
- return True
-
-def check_cf(cf_names): # pragma: no cover
- """Check that names are valid CF standard names
- Args:
- cf_names: list of names to test
- Returns:
- True / False
- """
- ok = True
- for cf_name in cf_names:
- try:
- pti.get_cf_standard_name(cf_name)
- logger.debug(f'OK - {cf_name} is a CF standard name.')
- except IndexError:
- logger.debug(f'NOK - {cf_name} is not a CF standard name.')
- ok = False
-
- return ok
-
-def check_vocabulary(xmldoc): # pragma: no cover
- """Check controlled vocabularies for elements:
- - access_constraint
- - activity_type
- - operational_status (comment: also checked in MMD XSD schema)
- - use_constraint
- Args:
- xmldoc: ElementTree containing the full XML document
- Returns:
- True / False
-
- Comments: The following elements have test functions available in pythesint but are not used:
- - area -> because it does not correspond to an element in currently tested files
- - platform type -> because erroneous thesaurus in mmd repo?
- """
- vocabularies = {
- 'access_constraint': 'access_constraints', 'activity_type': 'activity_type',
- 'operational_status': 'operstatus', 'use_constraint': 'use_constraint_type',
- }
- ok = True
- for element_name, f_name in vocabularies.items():
- if f_name == 'use_constraint_type':
- elems_found = xmldoc.findall('./{*}' + element_name + '/{*}identifier')
- else:
- elems_found = xmldoc.findall('./{*}' + element_name)
-
- if len(elems_found) >= 1:
- for rep in elems_found:
- try:
- getattr(pti, 'get_mmd_'+f_name)(rep.text)
- logger.debug(
- f'OK - {rep.text} is correct vocabulary for element {element_name}.'
- )
- except IndexError:
- logger.debug(f'NOK - {rep.text} is not correct vocabulary for element'
- f' {element_name}. \n Accepted vocabularies are '
- f'{getattr(pti, "get_mmd_"+f_name+"_list")()}')
- ok = False
- else:
- logger.debug(f'Element {element_name} not present.')
-
- return ok
-
-def full_check(doc):
- """Main checking scripts for in depth checking of XML file.
- - checking URLs
- - checking lat-lon within geographic_extent/rectangle
- - checking CF names against standard table
- - checking controlled vocabularies (access_constraint /
- activity_type / operational_status / use_constraint)
-
- Args:
- doc: ElementTree containing the full XML document
- Returns:
- True / False
- """
- valid = True
-
- # Get elements with urls and check for OK response
- urls = doc.findall(".//{*}resource")
- if len(urls) > 0:
- logger.debug("Checking element(s) containing URL ...")
- urls_ok = all([check_url(elem.text) for elem in urls])
- if urls_ok:
- logger.info("OK: %d URLs" % len(urls))
- else:
- logger.info("NOK: URLs - check debug log")
- valid &= urls_ok
- else:
- logger.debug("Found no elements contained an URL")
-
- # If there is an element geographic_extent/rectangle, check that lat/lon are valid
- rectangle = doc.findall("./{*}geographic_extent/{*}rectangle")
- if len(rectangle) > 0:
- logger.debug("Checking element geographic_extent/rectangle ...")
- rect_ok = check_rectangle(rectangle)
- if rect_ok:
- logger.info("OK: geographic_extent/rectangle")
- else:
- logger.info("NOK: geographic_extent/rectangle - check debug log")
- valid &= rect_ok
- else:
- logger.debug("Found no geographic_extent/rectangle element")
-
- # Check that cf name provided exist in reference Standard Name Table
- # cf_elements = doc.findall('./{*}keywords[@vocabulary="Climate and Forecast Standard Names"]')
- # if len(cf_elements) == 1:
- # logger.debug('Checking elements keyword from vocabulary CF ...')
- # cf_list = [elem.text for elem in cf_elements[0]]
- # if len(cf_list) > 1:
- # logger.info(f'NOK - CF names -> only one CF name should be provided - {cf_list}')
- # valid = False
- # # Check CF names even if more than one provided
- # cf_ok = check_cf(cf_list)
- # if cf_ok:
- # logger.info('OK - CF names')
- # else:
- # logger.info('NOK - CF names -> check debug log')
- # valid &= cf_ok
- # elif len(cf_elements) > 1:
- # valid = False
- # logger.debug(
- # 'NOK - More than one element with keywords[@vocabulary="Climate and '
- # 'Forecast Standard Names"]'
- # )
- # else:
- # logger.debug('No CF standard names element.')
-
- # Check controlled vocabularies
- # voc_ok = check_vocabulary(doc)
- # valid &= voc_ok
- # if voc_ok:
- # logger.info('OK - Controlled vocabularies.')
- # else:
- # logger.info('NOK - Controlled vocabularies -> check debug log')
-
- return valid
diff --git a/dmci/mmd_tools/__init__.py b/dmci/tools/__init__.py
similarity index 91%
rename from dmci/mmd_tools/__init__.py
rename to dmci/tools/__init__.py
index ca0b546..d400c83 100644
--- a/dmci/mmd_tools/__init__.py
+++ b/dmci/tools/__init__.py
@@ -21,8 +21,8 @@
limitations under the License.
"""
-from dmci.mmd_tools.check_mmd import full_check
+from dmci.tools.check_mmd import CheckMMD
__all__ = [
- "full_check",
+ "CheckMMD",
]
diff --git a/dmci/tools/check_mmd.py b/dmci/tools/check_mmd.py
new file mode 100644
index 0000000..d2ac7ac
--- /dev/null
+++ b/dmci/tools/check_mmd.py
@@ -0,0 +1,282 @@
+# -*- coding: utf-8 -*-
+"""
+DMCI : MMD Checker Functions
+============================
+
+Copyright 2021 MET Norway
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import logging
+import pythesint as pti
+
+from lxml import etree
+from urllib.parse import urlparse
+
+logger = logging.getLogger(__name__)
+
+class CheckMMD():
+
+ def __init__(self):
+ self._status_pass = []
+ self._status_fail = []
+ self._status_ok = True
+ return
+
+ def clear(self):
+ """Clear the status data.
+ """
+ self._status_pass = []
+ self._status_fail = []
+ self._status_ok = True
+ return
+
+ def status(self):
+ """Return the status of checks run since last clear.
+ """
+ return self._status_ok, self._status_pass, self._status_fail
+
+ def check_rectangle(self, rectangle):
+ """Check if element geographic extent/rectangle is valid:
+ - only 1 existing rectangle element
+ - rectangle has north / south / west / east subelements
+ - -180 <= min_lat <= max_lat <= 180
+ - 0 <= min_lon <= max_lon <= 360
+ Args:
+ rectangle: list of elements found when requesting node(s)
+ geographic_extent/rectangle
+ Returns:
+ True / False
+ """
+ keys = ["north", "south", "west", "east"]
+ directions = dict.fromkeys(keys, None)
+
+ ok = True
+ err = []
+ if len(rectangle) > 1:
+ err.append("Multiple rectangle elements in file.")
+ ok = False
+
+ for child in rectangle[0]:
+ child_ns = etree.QName(child)
+ tag = child_ns.localname
+ if tag not in keys:
+ err.append("The element '%s' is not a valid rectangle element." % tag)
+ ok = False
+ try:
+ directions[tag] = float(child.text)
+ except ValueError:
+ err.append("Value of rectangle element '%s' is not a number." % tag)
+ ok = False
+
+ for key, val in directions.items():
+ if val is None:
+ err.append("Missing rectangle element '%s'." % key)
+ ok = False
+
+ if ok:
+ # Only check this if all values are successfully read
+ if not (-180.0 <= directions["west"] <= directions["east"] <= 180.0):
+ err.append("Longitudes not in range -180 <= west <= east <= 180.")
+ ok = False
+
+ if not (-90.0 <= directions["south"] <= directions["north"] <= 90.0):
+ err.append("Latitudes not in range -90 <= south <= north <= 90.")
+ ok = False
+
+ self._log_result("Rectangle Check", ok, err)
+
+ return ok, err
+
+ def check_url(self, url, allow_no_path=False):
+ """Check that an URL is valid.
+ """
+ ok = True
+ err = []
+ try:
+ url.encode("ascii")
+ except Exception:
+ err.append("URL contains non-ASCII characters.")
+ ok = False
+
+ try:
+ parsed = urlparse(url)
+ if parsed.scheme not in ("http", "https", "ftp", "sftp"):
+ err.append("URL scheme '%s' not allowed." % parsed.scheme)
+ ok = False
+
+ if not (parsed.netloc and "." in parsed.netloc):
+ err.append("Domain '%s' is not valid." % parsed.netloc)
+ ok = False
+
+ if not (parsed.path or allow_no_path):
+ err.append("URL contains no path. At least '/' is required.")
+ ok = False
+
+ except Exception:
+ err.append("URL cannot be parsed by urllib.")
+ ok = False
+
+ self._log_result(f"URL Check on '{url}'", ok, err)
+
+ return ok, err
+
+ def check_cf(self, xmldoc):
+ """Check that names are valid CF standard names
+ Args:
+ cf_names: list of names to test
+ Returns:
+ True / False
+ """
+ ok = True
+ err = []
+
+ cf_elements = xmldoc.findall(
+ "./{*}keywords[@vocabulary=\"Climate and Forecast Standard Names\"]"
+ )
+ n_cf = len(cf_elements)
+ if n_cf == 1:
+ cf_list = [elem.text for elem in cf_elements[0]]
+ if len(cf_list) > 1:
+ err.append("Only one CF name should be provided, got %d." % len(cf_list))
+ ok = False
+
+ # Check CF names even if more than one provided
+ for cf_name in cf_list:
+ try:
+ pti.get_cf_standard_name(cf_name)
+ except IndexError:
+ err.append("Keyword '%s' is not a CF standard name." % cf_name)
+ ok = False
+
+ elif n_cf > 1:
+ err.append("More than one CF entry found. Only one is allowed.")
+ ok = False
+
+ if n_cf > 0:
+ self._log_result("Climate and Forecast Standard Names Check", ok, err)
+
+ return ok, err, n_cf
+
+ # The following function needs to be reimplemented
+ def check_vocabulary(self, xmldoc):
+ """Check controlled vocabularies for elements:
+ - access_constraint
+ - activity_type
+ - operational_status
+ - use_constraint
+ Args:
+ xmldoc: ElementTree containing the full XML document
+ Returns:
+ True / False
+ List of errors
+
+ Comments: The following elements have test functions available
+ in pythesint but are not used:
+ - area -> because it does not correspond to an element in
+ currently tested files
+ - platform type -> because erroneous thesaurus in mmd repo?
+ """
+ vocabularies = {
+ "access_constraint": pti.get_mmd_access_constraints,
+ "activity_type": pti.get_mmd_activity_type,
+ "operational_status": pti.get_mmd_operstatus,
+ "use_constraint": pti.get_mmd_use_constraint_type,
+ }
+ ok = True
+ err = []
+ num = 0
+
+ for element_name, f_name in vocabularies.items():
+ if element_name == "use_constraint":
+ elems_found = xmldoc.findall("./{*}" + element_name + "/{*}identifier")
+ else:
+ elems_found = xmldoc.findall("./{*}" + element_name)
+
+ if len(elems_found) >= 1:
+ for rep in elems_found:
+ num += 1
+ try:
+ f_name(rep.text)
+ except IndexError:
+ err.append("Incorrect vocabulary '%s' for element '%s'." % (
+ rep.text, element_name
+ ))
+ ok = False
+
+ if num > 0:
+ self._log_result("Controlled Vocabularies Check", ok, err)
+
+ return ok, err
+
+ def full_check(self, doc):
+ """Main checking scripts for in depth checking of XML file.
+ - checking URLs
+ - checking lat-lon within geographic_extent/rectangle
+ - checking CF names against standard table
+ - checking controlled vocabularies (access_constraint /
+ activity_type / operational_status / use_constraint)
+
+ Args:
+ doc: ElementTree containing the full XML document
+ Returns:
+ True / False
+ """
+ self.clear()
+ valid = True
+
+ # Get elements with urls and check for OK response
+ urls = doc.findall(".//{*}resource")
+ if len(urls) > 0:
+ logger.debug("Checking element(s) containing URL ...")
+ for elem in urls:
+ urls_ok, _ = self.check_url(elem.text)
+ valid &= urls_ok
+
+ # If there is an element geographic_extent/rectangle, check that lat/lon are valid
+ rectangle = doc.findall("./{*}geographic_extent/{*}rectangle")
+ if len(rectangle) > 0:
+ logger.debug("Checking element geographic_extent/rectangle ...")
+ rect_ok, _ = self.check_rectangle(rectangle)
+ valid &= rect_ok
+
+ # Check that cf name provided exist in reference Standard Name Table
+ cf_ok, _, _ = self.check_cf(doc)
+ valid &= cf_ok
+
+ # Check controlled vocabularies
+ voc_ok, _ = self.check_vocabulary(doc)
+ valid &= voc_ok
+
+ return valid
+
+ ##
+ # Internal Functions
+ ##
+
+ def _log_result(self, check, ok, err):
+ """Write the result of a check to the status variables.
+ """
+ if ok:
+ self._status_pass.append("Passed: %s" % check)
+ else:
+ self._status_fail.append("Failed: %s" % check)
+ for fail in err:
+ self._status_fail.append(" - %s" % fail)
+
+ self._status_ok &= ok
+
+ return
+
+# END Class CheckMMD
diff --git a/pytest.ini b/pytest.ini
index 503358c..5d050f7 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -3,5 +3,5 @@ markers =
core: Core functionality tests
dist: Distributor tests
api: Api functionality tests
- mmd_tools: MMD toolbox tests
+ tools: Toolbox tests
serial
diff --git a/tests/test_api/test_app.py b/tests/test_api/test_app.py
index c9aebb1..7441c0a 100644
--- a/tests/test_api/test_app.py
+++ b/tests/test_api/test_app.py
@@ -108,13 +108,33 @@ def testApiApp_InsertRequests(client, filesDir, monkeypatch):
tooLargeFile = bytes(3000000)
assert client.post("/v1/insert", data=tooLargeFile).status_code == 413
+ # Fail cahcing the file
+ with monkeypatch.context() as mp:
+ mp.setattr("builtins.open", causeOSError)
+ assert client.post("/v1/insert", data=MOCK_XML).status_code == 507
+
+ # Data is valid
with monkeypatch.context() as mp:
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, ""))
assert client.post("/v1/insert", data=MOCK_XML).status_code == 200
+ # Data is not valid
with monkeypatch.context() as mp:
mp.setattr("dmci.api.app.Worker.validate", lambda *a: (False, ""))
- assert client.post("/v1/insert", data=MOCK_XML).status_code == 500
+ assert client.post("/v1/insert", data=MOCK_XML).status_code == 400
+
+ # Data is valid, distribute fails
+ with monkeypatch.context() as mp:
+ fail = ["A", "B"]
+ skip = ["C"]
+ mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, ""))
+ mp.setattr("dmci.api.app.Worker.distribute", lambda *a: (False, False, [], fail, skip))
+ response = client.post("/v1/insert", data=MOCK_XML)
+ assert response.status_code == 500
+ assert response.data == (
+ b"The following distributors failed: A, B\n"
+ b"The following jobs were skipped: C"
+ )
# END Test testApiApp_InsertRequests
diff --git a/tests/test_api/test_worker.py b/tests/test_api/test_worker.py
index 0774b5b..7043bcd 100644
--- a/tests/test_api/test_worker.py
+++ b/tests/test_api/test_worker.py
@@ -26,6 +26,7 @@
from dmci.api.worker import Worker
from dmci.distributors import FileDist, PyCSWDist
+from dmci.tools import CheckMMD
@pytest.mark.api
def testApiWorker_Init():
@@ -58,15 +59,34 @@ def testApiWorker_Distributor(tmpDir, tmpConf, mockXml, monkeypatch):
assert failed == []
assert skipped == ["blabla"]
+ # Same as above, but jobs fail
+ with monkeypatch.context() as mp:
+ mp.setattr(FileDist, "run", lambda *a: False)
+ mp.setattr(PyCSWDist, "run", lambda *a: False)
+
+ tstWorker = Worker(None, None)
+ tstWorker._conf = tmpConf
+ tstWorker._dist_xml_file = mockXml
+
+ status, valid, called, failed, skipped = tstWorker.distribute()
+ assert status is False
+ assert valid is True
+ assert called == []
+ assert failed == ["file", "pycsw"]
+ assert skipped == ["blabla"]
+
# Call the distributor function with the wrong parameters
tstWorker = Worker(None, None)
tstWorker._conf = tmpConf
tstWorker._dist_cmd = "blabla"
tstWorker._dist_xml_file = "/path/to/nowhere"
- status, valid, _, _, _ = tstWorker.distribute()
- assert status is False
- assert valid is False
+ status, valid, called, failed, skipped = tstWorker.distribute()
+ assert status is True # No jobs were run since all were skipped
+ assert valid is False # All jobs were invalid due to the command
+ assert called == []
+ assert failed == []
+ assert skipped == ["file", "pycsw", "blabla"]
# END Test testApiWorker_Distributor
@@ -119,7 +139,7 @@ def testApiWorker_CheckInfoContent(monkeypatch, filesDir):
# Valid data format
with monkeypatch.context() as mp:
- mp.setattr("dmci.mmd_tools.check_mmd.check_url", lambda *a, **k: True)
+ mp.setattr(CheckMMD, "check_url", lambda *a, **k: (True, []))
passData = bytes(readFile(passFile), "utf-8")
assert tstWorker._check_information_content(passData) == (
True, "Input MMD XML file is ok"
@@ -127,7 +147,7 @@ def testApiWorker_CheckInfoContent(monkeypatch, filesDir):
# Valid data format, invalid content
with monkeypatch.context() as mp:
- mp.setattr("dmci.mmd_tools.check_mmd.check_url", lambda *a, **k: False)
+ mp.setattr(CheckMMD, "check_url", lambda *a, **k: (False, []))
passData = bytes(readFile(passFile), "utf-8")
assert tstWorker._check_information_content(passData) == (
False, "Input MMD XML file contains errors, please check your file"
@@ -140,6 +160,32 @@ def testApiWorker_CheckInfoContent(monkeypatch, filesDir):
False, "Input MMD XML file has no valid UUID metadata_identifier"
)
+ # Check Error report
+ failFile = os.path.join(filesDir, "api", "failing.xml")
+ failData = (
+ b""
+ b" 00000000-0000-0000-0000-000000000000"
+ b" imap://met.no"
+ b" "
+ b" "
+ b" 76.199661"
+ b" 71.63427"
+ b" -28.114723"
+ b" "
+ b" "
+ b""
+ )
+ assert tstWorker._check_information_content(failData) == (
+ False, (
+ "Input MMD XML file contains errors, please check your file\n"
+ "Failed: URL Check on 'imap://met.no'\n"
+ " - URL scheme 'imap' not allowed.\n"
+ " - URL contains no path. At least '/' is required.\n"
+ "Failed: Rectangle Check\n"
+ " - Missing rectangle element 'east'.\n"
+ ).rstrip()
+ )
+
# END Test testApiWorker_CheckInfoContent
@pytest.mark.api
diff --git a/tests/test_mmd_tools/test_mmd_tools.py b/tests/test_mmd_tools/test_mmd_tools.py
index 88da474..7eeb05e 100644
--- a/tests/test_mmd_tools/test_mmd_tools.py
+++ b/tests/test_mmd_tools/test_mmd_tools.py
@@ -20,22 +20,20 @@
import os
import pytest
-import logging
from lxml import etree
-from dmci.mmd_tools.check_mmd import (
- check_rectangle, check_url, check_cf, check_vocabulary, full_check
-)
+from dmci.tools import CheckMMD
-@pytest.mark.mmd_tools
-def testMMDTools_CheckRectangle(caplog):
+@pytest.mark.tools
+def testMMDTools_CheckRectangle():
"""Test the check_rectangle function.
"""
- caplog.set_level(logging.DEBUG, logger="dmci")
+ chkMMD = CheckMMD()
+
+ # Check lat/lon OK from rectangle
etreeRef = etree.ElementTree(etree.XML(
""
- " https://www.met.no/"
" "
" "
" 76.199661"
@@ -46,127 +44,323 @@ def testMMDTools_CheckRectangle(caplog):
" "
""
))
+ rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle")
+ ok, err = chkMMD.check_rectangle(rect)
+ assert ok is True
+ assert err == []
- # Check lat/lon OK from rectangle
+ # Check direction missing
+ etreeRef = etree.ElementTree(etree.XML(
+ ""
+ " "
+ " "
+ " 76.199661"
+ " 71.63427"
+ " -28.114723"
+ " "
+ " "
+ ""
+ ))
rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle")
- assert check_rectangle(rect) is True
-
- # Check longitude NOK
- root = etree.Element("rectangle")
- etree.SubElement(root, "south").text = "20"
- etree.SubElement(root, "north").text = "50"
- etree.SubElement(root, "west").text = "50"
- etree.SubElement(root, "east").text = "0"
- assert check_rectangle([root]) is False
- assert "Longitudes not ok" in caplog.text
-
- # Check latitude NOK
- root = etree.Element("rectangle")
- etree.SubElement(root, "south").text = "-182"
- etree.SubElement(root, "north").text = "50"
- etree.SubElement(root, "west").text = "0"
- etree.SubElement(root, "east").text = "180"
- assert check_rectangle([root]) is False
- assert "Latitudes not ok" in caplog.text
+ ok, err = chkMMD.check_rectangle(rect)
+ assert ok is False
+ assert err == ["Missing rectangle element 'east'."]
+
+ # Check invalid longitude
+ etreeRef = etree.ElementTree(etree.XML(
+ ""
+ " "
+ " "
+ " 50"
+ " 20"
+ " 50"
+ " 0"
+ " "
+ " "
+ ""
+ ))
+ rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle")
+ ok, err = chkMMD.check_rectangle(rect)
+ assert ok is False
+ assert err == ["Longitudes not in range -180 <= west <= east <= 180."]
+
+ # Check invalid longitude
+ etreeRef = etree.ElementTree(etree.XML(
+ ""
+ " "
+ " "
+ " -182"
+ " 50"
+ " 0"
+ " 180"
+ " "
+ " "
+ ""
+ ))
+ rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle")
+ ok, err = chkMMD.check_rectangle(rect)
+ assert ok is False
+ assert err == ["Latitudes not in range -90 <= south <= north <= 90."]
# Check more than one rectangle as input
- assert check_rectangle(["elem1", "elem2"]) is False
- assert "Multiple rectangle elements in file" in caplog.text
+ elem = etree.Element("rectangle")
+ ok, err = chkMMD.check_rectangle([elem, elem])
+ assert ok is False
+ assert err[0] == "Multiple rectangle elements in file."
# Check lat & long OK with namespace
- root = etree.Element("rectangle")
- etree.SubElement(root, "{http://www.met.no/schema/mmd}south").text = "20"
- etree.SubElement(root, "{http://www.met.no/schema/mmd}north").text = "50"
- etree.SubElement(root, "{http://www.met.no/schema/mmd}west").text = "0"
- etree.SubElement(root, "{http://www.met.no/schema/mmd}east").text = "50"
- assert check_rectangle([root]) is True
-
- # Check rectangle with one missing element (no west)
- root = etree.Element("rectangle")
- etree.SubElement(root, "south").text = "-182"
- etree.SubElement(root, "north").text = "50"
- etree.SubElement(root, "east").text = "180"
- assert check_rectangle([root]) is False
- assert "Missing rectangle element west" in caplog.text
+ etreeRef = etree.ElementTree(etree.XML(
+ ""
+ " "
+ " "
+ " 76.199661"
+ " 71.63427"
+ " -28.114723"
+ " -11.169785"
+ " "
+ " "
+ ""
+ ))
+ rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle")
+ ok, err = chkMMD.check_rectangle(rect)
+ assert ok is True
+ assert err == []
+
+ # Check rectangle with element with typo
+ etreeRef = etree.ElementTree(etree.XML(
+ ""
+ " "
+ " "
+ " 76.199661"
+ " 71.63427"
+ " -28.114723"
+ " -11.169785"
+ " "
+ " "
+ ""
+ ))
+ rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle")
+ ok, err = chkMMD.check_rectangle(rect)
+ assert ok is False
+ assert err == [
+ "The element 'easttt' is not a valid rectangle element.",
+ "Missing rectangle element 'east'."
+ ]
+
+ # Check rectangle with non-numeric value
+ etreeRef = etree.ElementTree(etree.XML(
+ ""
+ " "
+ " "
+ " 76.199661"
+ " 71.63427"
+ " -28.114723"
+ " -stuff"
+ " "
+ " "
+ ""
+ ))
+ rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle")
+ ok, err = chkMMD.check_rectangle(rect)
+ assert ok is False
+ assert err == [
+ "Value of rectangle element 'east' is not a number.",
+ "Missing rectangle element 'east'."
+ ]
# END Test testMMDTools_CheckRectangle
-@pytest.mark.mmd_tools
+@pytest.mark.tools
def testMMDTools_CheckURLs():
"""Test the check_url function.
"""
+ chkMMD = CheckMMD()
+
# Valid URL
- assert check_url("https://www.met.no/") is True
+ ok, err = chkMMD.check_url("https://www.met.no/")
+ assert ok is True
+ assert err == []
# Schemes
- assert check_url("https://www.met.no/") is True
- assert check_url("http://www.met.no/") is True
- assert check_url("file://www.met.no/") is False
- assert check_url("imap://www.met.no/") is False
- assert check_url("stuff://www.met.no/") is False
+ ok, err = chkMMD.check_url("https://www.met.no/")
+ assert ok is True
+ assert err == []
+
+ ok, err = chkMMD.check_url("http://www.met.no/")
+ assert ok is True
+ assert err == []
+
+ ok, err = chkMMD.check_url("file://www.met.no/")
+ assert ok is False
+ assert err == ["URL scheme 'file' not allowed."]
+
+ ok, err = chkMMD.check_url("imap://www.met.no/")
+ assert ok is False
+ assert err == ["URL scheme 'imap' not allowed."]
+
+ ok, err = chkMMD.check_url("stuff://www.met.no/")
+ assert ok is False
+ assert err == ["URL scheme 'stuff' not allowed."]
# Domains
- assert check_url("https://www.met.no/") is True
- assert check_url("https://met.no/") is True
- assert check_url("https:/www.met.no/") is False
- assert check_url("https://metno/") is False
+ ok, err = chkMMD.check_url("https://www.met.no/")
+ assert ok is True
+ assert err == []
+
+ ok, err = chkMMD.check_url("https://met.no/")
+ assert ok is True
+ assert err == []
+
+ ok, err = chkMMD.check_url("https:/www.met.no/")
+ assert ok is False
+ assert err == ["Domain '' is not valid."]
+
+ ok, err = chkMMD.check_url("https://metno/")
+ assert ok is False
+ assert err == ["Domain 'metno' is not valid."]
# Path
- assert check_url("https://www.met.no", allow_no_path=True) is True
- assert check_url("https://www.met.no") is False
- assert check_url("https://www.met.no/") is True
- assert check_url("https://www.met.no/location") is True
+ ok, err = chkMMD.check_url("https://www.met.no", allow_no_path=True)
+ assert ok is True
+ assert err == []
+
+ ok, err = chkMMD.check_url("https://www.met.no")
+ assert ok is False
+ assert err == ["URL contains no path. At least '/' is required."]
+
+ ok, err = chkMMD.check_url("https://www.met.no/")
+ assert ok is True
+ assert err == []
+
+ ok, err = chkMMD.check_url("https://www.met.no/location")
+ assert ok is True
+ assert err == []
# Non-ASCII characters
- assert check_url("https://www.mæt.no/") is False
+ ok, err = chkMMD.check_url("https://www.mæt.no/")
+ assert ok is False
+ assert err == ["URL contains non-ASCII characters."]
# Unparsable
- assert check_url(12345) is False
+ ok, err = chkMMD.check_url(12345)
+ assert ok is False
+ assert err == ["URL contains non-ASCII characters.", "URL cannot be parsed by urllib."]
# END Test testMMDTools_CheckURLs
-@pytest.mark.mmd_tools
-def off_testMMDTools_CheckCF():
+@pytest.mark.tools
+def testMMDTools_CheckCF():
"""Test the check_cf function.
"""
- assert check_cf(["sea_surface_temperature"]) is True
- assert check_cf(["sea_surace_temperature"]) is False
+ chkMMD = CheckMMD()
+
+ ok, err, n = chkMMD.check_cf(etree.ElementTree(etree.XML(
+ ""
+ " "
+ " sea_surface_temperature"
+ " "
+ ""
+ )))
+ assert ok is True
+ assert err == []
+ assert n == 1
+
+ ok, err, n = chkMMD.check_cf(etree.ElementTree(etree.XML(
+ ""
+ " "
+ " sea_surface_temperature"
+ " sea_surface_temperature"
+ " "
+ ""
+ )))
+ assert ok is False
+ assert err == ["Only one CF name should be provided, got 2."]
+ assert n == 1
+
+ ok, err, n = chkMMD.check_cf(etree.ElementTree(etree.XML(
+ ""
+ " "
+ " sea_surace_temperature"
+ " "
+ ""
+ )))
+ assert ok is False
+ assert err == ["Keyword 'sea_surace_temperature' is not a CF standard name."]
+ assert n == 1
+
+ ok, err, n = chkMMD.check_cf(etree.ElementTree(etree.XML(
+ ""
+ " "
+ " sea_surface_temperature"
+ " "
+ " "
+ " sea_surface_temperature"
+ " "
+ ""
+ )))
+ assert ok is False
+ assert err == ["More than one CF entry found. Only one is allowed."]
+ assert n == 2
# END Test testMMDTools_CheckCF
-@pytest.mark.mmd_tools
-def off_testMMDTools_CheckVocabulary():
+@pytest.mark.tools
+def testMMDTools_CheckVocabulary():
"""Test the check_vocabulary function.
"""
- assert check_vocabulary(etree.ElementTree(etree.XML(
+ chkMMD = CheckMMD()
+ ok, err = chkMMD.check_vocabulary(etree.ElementTree(etree.XML(
"Operational"
- ))) is True
+ )))
+ assert ok is True
+ assert err == []
- assert check_vocabulary(etree.ElementTree(etree.XML(
+ ok, err = chkMMD.check_vocabulary(etree.ElementTree(etree.XML(
"OOperational"
- ))) is False
+ )))
+ assert ok is False
+ assert err == ["Incorrect vocabulary 'OOperational' for element 'operational_status'."]
# END Test testMMDTools_CheckVocabulary
-@pytest.mark.mmd_tools
-def testMMDTools_FullCheck(filesDir, caplog):
+@pytest.mark.tools
+def testMMDTools_FullCheck(filesDir):
"""Test the full_check function.
"""
- caplog.set_level(logging.DEBUG, logger="dmci")
+ chkMMD = CheckMMD()
passFile = os.path.join(filesDir, "api", "passing.xml")
passTree = etree.parse(passFile, parser=etree.XMLParser(remove_blank_text=True))
# Full check
- caplog.clear()
- assert full_check(passTree) is True
- assert "OK: 9 URLs" in caplog.text
- assert "OK: geographic_extent/rectangle" in caplog.text
+ assert chkMMD.full_check(passTree) is True
+ ok, passed, failed = chkMMD.status()
+ assert ok is True
+ assert failed == []
+ assert "\n".join(passed) == (
+ "Passed: URL Check on 'https://gcmdservices.gsfc.nasa.gov/static/kms/'\n"
+ "Passed: URL Check on 'http://inspire.ec.europa.eu/theme'\n"
+ "Passed: URL Check on 'https://register.geonorge.no/subregister/metadata-kodelister/kartve"
+ "rket/nasjonal-temainndeling'\n"
+ "Passed: URL Check on 'http://spdx.org/licenses/CC-BY-4.0'\n"
+ "Passed: URL Check on 'https://thredds.met.no/thredds/dodsC/remotesensingsatellite/polar-s"
+ "wath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc'\n"
+ "Passed: URL Check on 'https://thredds.met.no/thredds/wms/remotesensingsatellite/polar-swa"
+ "th/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc?service=WMS&version=1.3.0&r"
+ "equest=GetCapabilities'\n"
+ "Passed: URL Check on 'https://thredds.met.no/thredds/fileServer/remotesensingsatellite/po"
+ "lar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc'\n"
+ "Passed: URL Check on 'https://www.wmo-sat.info/oscar/satellites/view/aqua'\n"
+ "Passed: URL Check on 'https://www.wmo-sat.info/oscar/instruments/view/modis'\n"
+ "Passed: Rectangle Check\n"
+ "Passed: Controlled Vocabularies Check\n"
+ ).rstrip()
# Full check with no elements to check
- caplog.clear()
- assert full_check(etree.ElementTree(etree.XML(""))) is True
- assert "Found no elements contained an URL" in caplog.text
- assert "Found no geographic_extent/rectangle element" in caplog.text
+ assert chkMMD.full_check(etree.ElementTree(etree.XML(""))) is True
+ ok, passed, failed = chkMMD.status()
+ assert ok is True
+ assert passed == []
+ assert failed == []
# Full check with invalid elements
etreeUrlRectNok = etree.ElementTree(etree.XML(
@@ -186,30 +380,20 @@ def testMMDTools_FullCheck(filesDir, caplog):
" NotOpen"
""
))
- caplog.clear()
- assert full_check(etreeUrlRectNok) is False
- assert "NOK: URLs" in caplog.text
- assert "NOK: geographic_extent/rectangle" in caplog.text
-
- # Twice the element keywords for the same vocabulary
- # root = etree.Element("toto")
- # key1 = etree.SubElement(root, "keywords", vocabulary="Climate and Forecast Standard Names")
- # etree.SubElement(key1, "keyword").text = "air_temperature"
- # key2 = etree.SubElement(root, "keywords", vocabulary="Climate and Forecast Standard Names")
- # etree.SubElement(key2, "keyword").text = "air_temperature"
- # assert full_check(root) is False
-
- # Correct case
- # root = etree.Element("toto")
- # root1 = etree.SubElement(root, "keywords", vocabulary="Climate and Forecast Standard Names")
- # etree.SubElement(root1, "keyword").text = "sea_surface_temperature"
- # assert full_check(root) is True
-
- # Two standard names provided
- # root = etree.Element("toto")
- # root1 = etree.SubElement(root, "keywords", vocabulary="Climate and Forecast Standard Names")
- # etree.SubElement(root1, "keyword").text = "air_temperature"
- # etree.SubElement(root1, "keyword").text = "sea_surface_temperature"
- # assert full_check(root) is False
+ assert chkMMD.full_check(etreeUrlRectNok) is False
+ ok, passed, failed = chkMMD.status()
+ assert ok is False
+ assert passed == []
+ assert "\n".join(failed) == (
+ "Failed: URL Check on 'https://www.mæt.no/'\n"
+ " - URL contains non-ASCII characters.\n"
+ "Failed: Rectangle Check\n"
+ " - Missing rectangle element 'east'.\n"
+ "Failed: Climate and Forecast Standard Names Check\n"
+ " - Only one CF name should be provided, got 2.\n"
+ " - Keyword 'air_surface_temperature' is not a CF standard name.\n"
+ "Failed: Controlled Vocabularies Check\n"
+ " - Incorrect vocabulary 'NotOpen' for element 'operational_status'.\n"
+ ).rstrip()
# END Test testMMDTools_FullCheck