diff --git a/dmci/api/app.py b/dmci/api/app.py index 3fcbefb..bf0f57b 100644 --- a/dmci/api/app.py +++ b/dmci/api/app.py @@ -25,7 +25,7 @@ import uuid from lxml import etree -from flask import request, Flask, after_this_request +from flask import request, Flask from dmci.api.worker import Worker @@ -55,7 +55,7 @@ def __init__(self): # Set up api entry points @self.route("/v1/insert", methods=["POST"]) - def base(): + def post_insert(): max_permitted_size = self._conf.max_permitted_size if request.content_length > max_permitted_size: @@ -63,25 +63,32 @@ def base(): data = request.get_data() + # Cache the job file file_uuid = uuid.uuid4() path = self._conf.distributor_cache full_path = os.path.join(path, f"{file_uuid}.Q") + msg, code = self._persist_file(data, full_path) + if code != 200: + return msg, code + # Run the validator worker = Worker(full_path, self._xsd_obj) - - @after_this_request - def dist(response): - nonlocal worker - worker.distribute() - return response - - result, msg = worker.validate(data) - if result: - return self._persist_file(data, full_path) + valid, msg = worker.validate(data) + if not valid: + return msg, 400 + + # Run the distributors + err = [] + status, valid, _, failed, skipped = worker.distribute() + if not status: + err.append("The following distributors failed: %s" % ", ".join(failed)) + if not valid: + err.append("The following jobs were skipped: %s" % ", ".join(skipped)) + + if err: + return "\n".join(err), 500 else: - return msg, 500 - - # TODO: shouldn't msg be logged? + return "Everything is OK", 200 return @@ -99,7 +106,7 @@ def _persist_file(data, full_path): except Exception as e: logger.error(str(e)) - return "Can't write to file", 507 + return "Cannot write xml data to cache file", 507 return "", 200 diff --git a/dmci/api/worker.py b/dmci/api/worker.py index 7a3bd21..4b14238 100644 --- a/dmci/api/worker.py +++ b/dmci/api/worker.py @@ -24,7 +24,7 @@ from lxml import etree from dmci import CONFIG -from dmci.mmd_tools import full_check +from dmci.tools import CheckMMD from dmci.distributors import FileDist, PyCSWDist logger = logging.getLogger(__name__) @@ -111,13 +111,15 @@ def distribute(self): **self._kwargs ) valid &= obj.is_valid() - if obj.is_valid: + if obj.is_valid(): obj_status = obj.run() status &= obj_status if obj_status: called.append(dist) else: failed.append(dist) + else: + skipped.append(dist) return status, valid, called, failed, skipped @@ -139,11 +141,15 @@ def _check_information_content(self, data): # Check XML file logger.info("Performing in depth checking.") - valid = full_check(xml_doc) + checker = CheckMMD() + valid = checker.full_check(xml_doc) if valid: msg = "Input MMD XML file is ok" else: + _, _, err = checker.status() msg = "Input MMD XML file contains errors, please check your file" + if err: + msg += "\n" + "\n".join(err) return valid, msg diff --git a/dmci/mmd_tools/check_mmd.py b/dmci/mmd_tools/check_mmd.py deleted file mode 100644 index 1184fa3..0000000 --- a/dmci/mmd_tools/check_mmd.py +++ /dev/null @@ -1,237 +0,0 @@ -# -*- coding: utf-8 -*- -""" -DMCI : MMD Checker Functions -============================ - -Copyright 2021 MET Norway - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import logging -import pythesint as pti - -from lxml import etree -from urllib.parse import urlparse - -logger = logging.getLogger(__name__) - -def check_rectangle(rectangle): - """Check if element geographic extent/rectangle is valid: - - only 1 existing rectangle element - - rectangle has north / south / west / east subelements - - -180 <= min_lat <= max_lat <= 180 - - 0 <= min_lon <= max_lon <= 360 - Args: - rectangle: list of elements found when requesting node(s) geographic_extent/rectangle - (output of ET request findall) - Returns: - True / False - """ - directions = dict.fromkeys(['north', 'south', 'west', 'east'], None) - - ok = True - if len(rectangle) > 1: - logger.debug("NOK: Multiple rectangle elements in file.") - return False - - for child in rectangle[0]: - # Also removes namespace, if any - child_ns = etree.QName(child) - directions[child_ns.localname] = float(child.text) - - for key, val in directions.items(): - if val is None: - logger.error("NOK: Missing rectangle element %s" % key) - return False - - if not (-180 <= directions['west'] <= directions['east'] <= 180): - logger.debug("NOK: Longitudes not ok") - ok = False - if not (-90 <= directions['south'] <= directions['north'] <= 90): - logger.debug("NOK: Latitudes not ok") - ok = False - if not ok: - logger.debug(directions) - - return ok - -def check_url(url, allow_no_path=False): - """Check that an URL is valid. - """ - try: - parsed = urlparse(url) - if parsed.scheme not in ("http", "https", "ftp", "sftp"): - logger.debug(f"NOK: {url}") - logger.debug("URL scheme not allowed") - return False - - if not (parsed.netloc and "." in parsed.netloc): - logger.debug(f"NOK: {url}") - logger.debug("No valid domain in URL") - return False - - if not (parsed.path or allow_no_path): - logger.debug(f"NOK: {url}") - logger.debug("No path in URL") - return False - - except Exception: - logger.debug(f"NOK: {url}") - logger.debug("URL cannot be parsed by urllib") - return False - - try: - url.encode("ascii") - except Exception: - logger.debug(f"NOK: {url}") - logger.debug("URL contains non-ASCII characters") - return False - - return True - -def check_cf(cf_names): # pragma: no cover - """Check that names are valid CF standard names - Args: - cf_names: list of names to test - Returns: - True / False - """ - ok = True - for cf_name in cf_names: - try: - pti.get_cf_standard_name(cf_name) - logger.debug(f'OK - {cf_name} is a CF standard name.') - except IndexError: - logger.debug(f'NOK - {cf_name} is not a CF standard name.') - ok = False - - return ok - -def check_vocabulary(xmldoc): # pragma: no cover - """Check controlled vocabularies for elements: - - access_constraint - - activity_type - - operational_status (comment: also checked in MMD XSD schema) - - use_constraint - Args: - xmldoc: ElementTree containing the full XML document - Returns: - True / False - - Comments: The following elements have test functions available in pythesint but are not used: - - area -> because it does not correspond to an element in currently tested files - - platform type -> because erroneous thesaurus in mmd repo? - """ - vocabularies = { - 'access_constraint': 'access_constraints', 'activity_type': 'activity_type', - 'operational_status': 'operstatus', 'use_constraint': 'use_constraint_type', - } - ok = True - for element_name, f_name in vocabularies.items(): - if f_name == 'use_constraint_type': - elems_found = xmldoc.findall('./{*}' + element_name + '/{*}identifier') - else: - elems_found = xmldoc.findall('./{*}' + element_name) - - if len(elems_found) >= 1: - for rep in elems_found: - try: - getattr(pti, 'get_mmd_'+f_name)(rep.text) - logger.debug( - f'OK - {rep.text} is correct vocabulary for element {element_name}.' - ) - except IndexError: - logger.debug(f'NOK - {rep.text} is not correct vocabulary for element' - f' {element_name}. \n Accepted vocabularies are ' - f'{getattr(pti, "get_mmd_"+f_name+"_list")()}') - ok = False - else: - logger.debug(f'Element {element_name} not present.') - - return ok - -def full_check(doc): - """Main checking scripts for in depth checking of XML file. - - checking URLs - - checking lat-lon within geographic_extent/rectangle - - checking CF names against standard table - - checking controlled vocabularies (access_constraint / - activity_type / operational_status / use_constraint) - - Args: - doc: ElementTree containing the full XML document - Returns: - True / False - """ - valid = True - - # Get elements with urls and check for OK response - urls = doc.findall(".//{*}resource") - if len(urls) > 0: - logger.debug("Checking element(s) containing URL ...") - urls_ok = all([check_url(elem.text) for elem in urls]) - if urls_ok: - logger.info("OK: %d URLs" % len(urls)) - else: - logger.info("NOK: URLs - check debug log") - valid &= urls_ok - else: - logger.debug("Found no elements contained an URL") - - # If there is an element geographic_extent/rectangle, check that lat/lon are valid - rectangle = doc.findall("./{*}geographic_extent/{*}rectangle") - if len(rectangle) > 0: - logger.debug("Checking element geographic_extent/rectangle ...") - rect_ok = check_rectangle(rectangle) - if rect_ok: - logger.info("OK: geographic_extent/rectangle") - else: - logger.info("NOK: geographic_extent/rectangle - check debug log") - valid &= rect_ok - else: - logger.debug("Found no geographic_extent/rectangle element") - - # Check that cf name provided exist in reference Standard Name Table - # cf_elements = doc.findall('./{*}keywords[@vocabulary="Climate and Forecast Standard Names"]') - # if len(cf_elements) == 1: - # logger.debug('Checking elements keyword from vocabulary CF ...') - # cf_list = [elem.text for elem in cf_elements[0]] - # if len(cf_list) > 1: - # logger.info(f'NOK - CF names -> only one CF name should be provided - {cf_list}') - # valid = False - # # Check CF names even if more than one provided - # cf_ok = check_cf(cf_list) - # if cf_ok: - # logger.info('OK - CF names') - # else: - # logger.info('NOK - CF names -> check debug log') - # valid &= cf_ok - # elif len(cf_elements) > 1: - # valid = False - # logger.debug( - # 'NOK - More than one element with keywords[@vocabulary="Climate and ' - # 'Forecast Standard Names"]' - # ) - # else: - # logger.debug('No CF standard names element.') - - # Check controlled vocabularies - # voc_ok = check_vocabulary(doc) - # valid &= voc_ok - # if voc_ok: - # logger.info('OK - Controlled vocabularies.') - # else: - # logger.info('NOK - Controlled vocabularies -> check debug log') - - return valid diff --git a/dmci/mmd_tools/__init__.py b/dmci/tools/__init__.py similarity index 91% rename from dmci/mmd_tools/__init__.py rename to dmci/tools/__init__.py index ca0b546..d400c83 100644 --- a/dmci/mmd_tools/__init__.py +++ b/dmci/tools/__init__.py @@ -21,8 +21,8 @@ limitations under the License. """ -from dmci.mmd_tools.check_mmd import full_check +from dmci.tools.check_mmd import CheckMMD __all__ = [ - "full_check", + "CheckMMD", ] diff --git a/dmci/tools/check_mmd.py b/dmci/tools/check_mmd.py new file mode 100644 index 0000000..d2ac7ac --- /dev/null +++ b/dmci/tools/check_mmd.py @@ -0,0 +1,282 @@ +# -*- coding: utf-8 -*- +""" +DMCI : MMD Checker Functions +============================ + +Copyright 2021 MET Norway + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import logging +import pythesint as pti + +from lxml import etree +from urllib.parse import urlparse + +logger = logging.getLogger(__name__) + +class CheckMMD(): + + def __init__(self): + self._status_pass = [] + self._status_fail = [] + self._status_ok = True + return + + def clear(self): + """Clear the status data. + """ + self._status_pass = [] + self._status_fail = [] + self._status_ok = True + return + + def status(self): + """Return the status of checks run since last clear. + """ + return self._status_ok, self._status_pass, self._status_fail + + def check_rectangle(self, rectangle): + """Check if element geographic extent/rectangle is valid: + - only 1 existing rectangle element + - rectangle has north / south / west / east subelements + - -180 <= min_lat <= max_lat <= 180 + - 0 <= min_lon <= max_lon <= 360 + Args: + rectangle: list of elements found when requesting node(s) + geographic_extent/rectangle + Returns: + True / False + """ + keys = ["north", "south", "west", "east"] + directions = dict.fromkeys(keys, None) + + ok = True + err = [] + if len(rectangle) > 1: + err.append("Multiple rectangle elements in file.") + ok = False + + for child in rectangle[0]: + child_ns = etree.QName(child) + tag = child_ns.localname + if tag not in keys: + err.append("The element '%s' is not a valid rectangle element." % tag) + ok = False + try: + directions[tag] = float(child.text) + except ValueError: + err.append("Value of rectangle element '%s' is not a number." % tag) + ok = False + + for key, val in directions.items(): + if val is None: + err.append("Missing rectangle element '%s'." % key) + ok = False + + if ok: + # Only check this if all values are successfully read + if not (-180.0 <= directions["west"] <= directions["east"] <= 180.0): + err.append("Longitudes not in range -180 <= west <= east <= 180.") + ok = False + + if not (-90.0 <= directions["south"] <= directions["north"] <= 90.0): + err.append("Latitudes not in range -90 <= south <= north <= 90.") + ok = False + + self._log_result("Rectangle Check", ok, err) + + return ok, err + + def check_url(self, url, allow_no_path=False): + """Check that an URL is valid. + """ + ok = True + err = [] + try: + url.encode("ascii") + except Exception: + err.append("URL contains non-ASCII characters.") + ok = False + + try: + parsed = urlparse(url) + if parsed.scheme not in ("http", "https", "ftp", "sftp"): + err.append("URL scheme '%s' not allowed." % parsed.scheme) + ok = False + + if not (parsed.netloc and "." in parsed.netloc): + err.append("Domain '%s' is not valid." % parsed.netloc) + ok = False + + if not (parsed.path or allow_no_path): + err.append("URL contains no path. At least '/' is required.") + ok = False + + except Exception: + err.append("URL cannot be parsed by urllib.") + ok = False + + self._log_result(f"URL Check on '{url}'", ok, err) + + return ok, err + + def check_cf(self, xmldoc): + """Check that names are valid CF standard names + Args: + cf_names: list of names to test + Returns: + True / False + """ + ok = True + err = [] + + cf_elements = xmldoc.findall( + "./{*}keywords[@vocabulary=\"Climate and Forecast Standard Names\"]" + ) + n_cf = len(cf_elements) + if n_cf == 1: + cf_list = [elem.text for elem in cf_elements[0]] + if len(cf_list) > 1: + err.append("Only one CF name should be provided, got %d." % len(cf_list)) + ok = False + + # Check CF names even if more than one provided + for cf_name in cf_list: + try: + pti.get_cf_standard_name(cf_name) + except IndexError: + err.append("Keyword '%s' is not a CF standard name." % cf_name) + ok = False + + elif n_cf > 1: + err.append("More than one CF entry found. Only one is allowed.") + ok = False + + if n_cf > 0: + self._log_result("Climate and Forecast Standard Names Check", ok, err) + + return ok, err, n_cf + + # The following function needs to be reimplemented + def check_vocabulary(self, xmldoc): + """Check controlled vocabularies for elements: + - access_constraint + - activity_type + - operational_status + - use_constraint + Args: + xmldoc: ElementTree containing the full XML document + Returns: + True / False + List of errors + + Comments: The following elements have test functions available + in pythesint but are not used: + - area -> because it does not correspond to an element in + currently tested files + - platform type -> because erroneous thesaurus in mmd repo? + """ + vocabularies = { + "access_constraint": pti.get_mmd_access_constraints, + "activity_type": pti.get_mmd_activity_type, + "operational_status": pti.get_mmd_operstatus, + "use_constraint": pti.get_mmd_use_constraint_type, + } + ok = True + err = [] + num = 0 + + for element_name, f_name in vocabularies.items(): + if element_name == "use_constraint": + elems_found = xmldoc.findall("./{*}" + element_name + "/{*}identifier") + else: + elems_found = xmldoc.findall("./{*}" + element_name) + + if len(elems_found) >= 1: + for rep in elems_found: + num += 1 + try: + f_name(rep.text) + except IndexError: + err.append("Incorrect vocabulary '%s' for element '%s'." % ( + rep.text, element_name + )) + ok = False + + if num > 0: + self._log_result("Controlled Vocabularies Check", ok, err) + + return ok, err + + def full_check(self, doc): + """Main checking scripts for in depth checking of XML file. + - checking URLs + - checking lat-lon within geographic_extent/rectangle + - checking CF names against standard table + - checking controlled vocabularies (access_constraint / + activity_type / operational_status / use_constraint) + + Args: + doc: ElementTree containing the full XML document + Returns: + True / False + """ + self.clear() + valid = True + + # Get elements with urls and check for OK response + urls = doc.findall(".//{*}resource") + if len(urls) > 0: + logger.debug("Checking element(s) containing URL ...") + for elem in urls: + urls_ok, _ = self.check_url(elem.text) + valid &= urls_ok + + # If there is an element geographic_extent/rectangle, check that lat/lon are valid + rectangle = doc.findall("./{*}geographic_extent/{*}rectangle") + if len(rectangle) > 0: + logger.debug("Checking element geographic_extent/rectangle ...") + rect_ok, _ = self.check_rectangle(rectangle) + valid &= rect_ok + + # Check that cf name provided exist in reference Standard Name Table + cf_ok, _, _ = self.check_cf(doc) + valid &= cf_ok + + # Check controlled vocabularies + voc_ok, _ = self.check_vocabulary(doc) + valid &= voc_ok + + return valid + + ## + # Internal Functions + ## + + def _log_result(self, check, ok, err): + """Write the result of a check to the status variables. + """ + if ok: + self._status_pass.append("Passed: %s" % check) + else: + self._status_fail.append("Failed: %s" % check) + for fail in err: + self._status_fail.append(" - %s" % fail) + + self._status_ok &= ok + + return + +# END Class CheckMMD diff --git a/pytest.ini b/pytest.ini index 503358c..5d050f7 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,5 +3,5 @@ markers = core: Core functionality tests dist: Distributor tests api: Api functionality tests - mmd_tools: MMD toolbox tests + tools: Toolbox tests serial diff --git a/tests/test_api/test_app.py b/tests/test_api/test_app.py index c9aebb1..7441c0a 100644 --- a/tests/test_api/test_app.py +++ b/tests/test_api/test_app.py @@ -108,13 +108,33 @@ def testApiApp_InsertRequests(client, filesDir, monkeypatch): tooLargeFile = bytes(3000000) assert client.post("/v1/insert", data=tooLargeFile).status_code == 413 + # Fail cahcing the file + with monkeypatch.context() as mp: + mp.setattr("builtins.open", causeOSError) + assert client.post("/v1/insert", data=MOCK_XML).status_code == 507 + + # Data is valid with monkeypatch.context() as mp: mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "")) assert client.post("/v1/insert", data=MOCK_XML).status_code == 200 + # Data is not valid with monkeypatch.context() as mp: mp.setattr("dmci.api.app.Worker.validate", lambda *a: (False, "")) - assert client.post("/v1/insert", data=MOCK_XML).status_code == 500 + assert client.post("/v1/insert", data=MOCK_XML).status_code == 400 + + # Data is valid, distribute fails + with monkeypatch.context() as mp: + fail = ["A", "B"] + skip = ["C"] + mp.setattr("dmci.api.app.Worker.validate", lambda *a: (True, "")) + mp.setattr("dmci.api.app.Worker.distribute", lambda *a: (False, False, [], fail, skip)) + response = client.post("/v1/insert", data=MOCK_XML) + assert response.status_code == 500 + assert response.data == ( + b"The following distributors failed: A, B\n" + b"The following jobs were skipped: C" + ) # END Test testApiApp_InsertRequests diff --git a/tests/test_api/test_worker.py b/tests/test_api/test_worker.py index 0774b5b..7043bcd 100644 --- a/tests/test_api/test_worker.py +++ b/tests/test_api/test_worker.py @@ -26,6 +26,7 @@ from dmci.api.worker import Worker from dmci.distributors import FileDist, PyCSWDist +from dmci.tools import CheckMMD @pytest.mark.api def testApiWorker_Init(): @@ -58,15 +59,34 @@ def testApiWorker_Distributor(tmpDir, tmpConf, mockXml, monkeypatch): assert failed == [] assert skipped == ["blabla"] + # Same as above, but jobs fail + with monkeypatch.context() as mp: + mp.setattr(FileDist, "run", lambda *a: False) + mp.setattr(PyCSWDist, "run", lambda *a: False) + + tstWorker = Worker(None, None) + tstWorker._conf = tmpConf + tstWorker._dist_xml_file = mockXml + + status, valid, called, failed, skipped = tstWorker.distribute() + assert status is False + assert valid is True + assert called == [] + assert failed == ["file", "pycsw"] + assert skipped == ["blabla"] + # Call the distributor function with the wrong parameters tstWorker = Worker(None, None) tstWorker._conf = tmpConf tstWorker._dist_cmd = "blabla" tstWorker._dist_xml_file = "/path/to/nowhere" - status, valid, _, _, _ = tstWorker.distribute() - assert status is False - assert valid is False + status, valid, called, failed, skipped = tstWorker.distribute() + assert status is True # No jobs were run since all were skipped + assert valid is False # All jobs were invalid due to the command + assert called == [] + assert failed == [] + assert skipped == ["file", "pycsw", "blabla"] # END Test testApiWorker_Distributor @@ -119,7 +139,7 @@ def testApiWorker_CheckInfoContent(monkeypatch, filesDir): # Valid data format with monkeypatch.context() as mp: - mp.setattr("dmci.mmd_tools.check_mmd.check_url", lambda *a, **k: True) + mp.setattr(CheckMMD, "check_url", lambda *a, **k: (True, [])) passData = bytes(readFile(passFile), "utf-8") assert tstWorker._check_information_content(passData) == ( True, "Input MMD XML file is ok" @@ -127,7 +147,7 @@ def testApiWorker_CheckInfoContent(monkeypatch, filesDir): # Valid data format, invalid content with monkeypatch.context() as mp: - mp.setattr("dmci.mmd_tools.check_mmd.check_url", lambda *a, **k: False) + mp.setattr(CheckMMD, "check_url", lambda *a, **k: (False, [])) passData = bytes(readFile(passFile), "utf-8") assert tstWorker._check_information_content(passData) == ( False, "Input MMD XML file contains errors, please check your file" @@ -140,6 +160,32 @@ def testApiWorker_CheckInfoContent(monkeypatch, filesDir): False, "Input MMD XML file has no valid UUID metadata_identifier" ) + # Check Error report + failFile = os.path.join(filesDir, "api", "failing.xml") + failData = ( + b"" + b" 00000000-0000-0000-0000-000000000000" + b" imap://met.no" + b" " + b" " + b" 76.199661" + b" 71.63427" + b" -28.114723" + b" " + b" " + b"" + ) + assert tstWorker._check_information_content(failData) == ( + False, ( + "Input MMD XML file contains errors, please check your file\n" + "Failed: URL Check on 'imap://met.no'\n" + " - URL scheme 'imap' not allowed.\n" + " - URL contains no path. At least '/' is required.\n" + "Failed: Rectangle Check\n" + " - Missing rectangle element 'east'.\n" + ).rstrip() + ) + # END Test testApiWorker_CheckInfoContent @pytest.mark.api diff --git a/tests/test_mmd_tools/test_mmd_tools.py b/tests/test_mmd_tools/test_mmd_tools.py index 88da474..7eeb05e 100644 --- a/tests/test_mmd_tools/test_mmd_tools.py +++ b/tests/test_mmd_tools/test_mmd_tools.py @@ -20,22 +20,20 @@ import os import pytest -import logging from lxml import etree -from dmci.mmd_tools.check_mmd import ( - check_rectangle, check_url, check_cf, check_vocabulary, full_check -) +from dmci.tools import CheckMMD -@pytest.mark.mmd_tools -def testMMDTools_CheckRectangle(caplog): +@pytest.mark.tools +def testMMDTools_CheckRectangle(): """Test the check_rectangle function. """ - caplog.set_level(logging.DEBUG, logger="dmci") + chkMMD = CheckMMD() + + # Check lat/lon OK from rectangle etreeRef = etree.ElementTree(etree.XML( "" - " https://www.met.no/" " " " " " 76.199661" @@ -46,127 +44,323 @@ def testMMDTools_CheckRectangle(caplog): " " "" )) + rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle") + ok, err = chkMMD.check_rectangle(rect) + assert ok is True + assert err == [] - # Check lat/lon OK from rectangle + # Check direction missing + etreeRef = etree.ElementTree(etree.XML( + "" + " " + " " + " 76.199661" + " 71.63427" + " -28.114723" + " " + " " + "" + )) rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle") - assert check_rectangle(rect) is True - - # Check longitude NOK - root = etree.Element("rectangle") - etree.SubElement(root, "south").text = "20" - etree.SubElement(root, "north").text = "50" - etree.SubElement(root, "west").text = "50" - etree.SubElement(root, "east").text = "0" - assert check_rectangle([root]) is False - assert "Longitudes not ok" in caplog.text - - # Check latitude NOK - root = etree.Element("rectangle") - etree.SubElement(root, "south").text = "-182" - etree.SubElement(root, "north").text = "50" - etree.SubElement(root, "west").text = "0" - etree.SubElement(root, "east").text = "180" - assert check_rectangle([root]) is False - assert "Latitudes not ok" in caplog.text + ok, err = chkMMD.check_rectangle(rect) + assert ok is False + assert err == ["Missing rectangle element 'east'."] + + # Check invalid longitude + etreeRef = etree.ElementTree(etree.XML( + "" + " " + " " + " 50" + " 20" + " 50" + " 0" + " " + " " + "" + )) + rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle") + ok, err = chkMMD.check_rectangle(rect) + assert ok is False + assert err == ["Longitudes not in range -180 <= west <= east <= 180."] + + # Check invalid longitude + etreeRef = etree.ElementTree(etree.XML( + "" + " " + " " + " -182" + " 50" + " 0" + " 180" + " " + " " + "" + )) + rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle") + ok, err = chkMMD.check_rectangle(rect) + assert ok is False + assert err == ["Latitudes not in range -90 <= south <= north <= 90."] # Check more than one rectangle as input - assert check_rectangle(["elem1", "elem2"]) is False - assert "Multiple rectangle elements in file" in caplog.text + elem = etree.Element("rectangle") + ok, err = chkMMD.check_rectangle([elem, elem]) + assert ok is False + assert err[0] == "Multiple rectangle elements in file." # Check lat & long OK with namespace - root = etree.Element("rectangle") - etree.SubElement(root, "{http://www.met.no/schema/mmd}south").text = "20" - etree.SubElement(root, "{http://www.met.no/schema/mmd}north").text = "50" - etree.SubElement(root, "{http://www.met.no/schema/mmd}west").text = "0" - etree.SubElement(root, "{http://www.met.no/schema/mmd}east").text = "50" - assert check_rectangle([root]) is True - - # Check rectangle with one missing element (no west) - root = etree.Element("rectangle") - etree.SubElement(root, "south").text = "-182" - etree.SubElement(root, "north").text = "50" - etree.SubElement(root, "east").text = "180" - assert check_rectangle([root]) is False - assert "Missing rectangle element west" in caplog.text + etreeRef = etree.ElementTree(etree.XML( + "" + " " + " " + " 76.199661" + " 71.63427" + " -28.114723" + " -11.169785" + " " + " " + "" + )) + rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle") + ok, err = chkMMD.check_rectangle(rect) + assert ok is True + assert err == [] + + # Check rectangle with element with typo + etreeRef = etree.ElementTree(etree.XML( + "" + " " + " " + " 76.199661" + " 71.63427" + " -28.114723" + " -11.169785" + " " + " " + "" + )) + rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle") + ok, err = chkMMD.check_rectangle(rect) + assert ok is False + assert err == [ + "The element 'easttt' is not a valid rectangle element.", + "Missing rectangle element 'east'." + ] + + # Check rectangle with non-numeric value + etreeRef = etree.ElementTree(etree.XML( + "" + " " + " " + " 76.199661" + " 71.63427" + " -28.114723" + " -stuff" + " " + " " + "" + )) + rect = etreeRef.findall("./{*}geographic_extent/{*}rectangle") + ok, err = chkMMD.check_rectangle(rect) + assert ok is False + assert err == [ + "Value of rectangle element 'east' is not a number.", + "Missing rectangle element 'east'." + ] # END Test testMMDTools_CheckRectangle -@pytest.mark.mmd_tools +@pytest.mark.tools def testMMDTools_CheckURLs(): """Test the check_url function. """ + chkMMD = CheckMMD() + # Valid URL - assert check_url("https://www.met.no/") is True + ok, err = chkMMD.check_url("https://www.met.no/") + assert ok is True + assert err == [] # Schemes - assert check_url("https://www.met.no/") is True - assert check_url("http://www.met.no/") is True - assert check_url("file://www.met.no/") is False - assert check_url("imap://www.met.no/") is False - assert check_url("stuff://www.met.no/") is False + ok, err = chkMMD.check_url("https://www.met.no/") + assert ok is True + assert err == [] + + ok, err = chkMMD.check_url("http://www.met.no/") + assert ok is True + assert err == [] + + ok, err = chkMMD.check_url("file://www.met.no/") + assert ok is False + assert err == ["URL scheme 'file' not allowed."] + + ok, err = chkMMD.check_url("imap://www.met.no/") + assert ok is False + assert err == ["URL scheme 'imap' not allowed."] + + ok, err = chkMMD.check_url("stuff://www.met.no/") + assert ok is False + assert err == ["URL scheme 'stuff' not allowed."] # Domains - assert check_url("https://www.met.no/") is True - assert check_url("https://met.no/") is True - assert check_url("https:/www.met.no/") is False - assert check_url("https://metno/") is False + ok, err = chkMMD.check_url("https://www.met.no/") + assert ok is True + assert err == [] + + ok, err = chkMMD.check_url("https://met.no/") + assert ok is True + assert err == [] + + ok, err = chkMMD.check_url("https:/www.met.no/") + assert ok is False + assert err == ["Domain '' is not valid."] + + ok, err = chkMMD.check_url("https://metno/") + assert ok is False + assert err == ["Domain 'metno' is not valid."] # Path - assert check_url("https://www.met.no", allow_no_path=True) is True - assert check_url("https://www.met.no") is False - assert check_url("https://www.met.no/") is True - assert check_url("https://www.met.no/location") is True + ok, err = chkMMD.check_url("https://www.met.no", allow_no_path=True) + assert ok is True + assert err == [] + + ok, err = chkMMD.check_url("https://www.met.no") + assert ok is False + assert err == ["URL contains no path. At least '/' is required."] + + ok, err = chkMMD.check_url("https://www.met.no/") + assert ok is True + assert err == [] + + ok, err = chkMMD.check_url("https://www.met.no/location") + assert ok is True + assert err == [] # Non-ASCII characters - assert check_url("https://www.mæt.no/") is False + ok, err = chkMMD.check_url("https://www.mæt.no/") + assert ok is False + assert err == ["URL contains non-ASCII characters."] # Unparsable - assert check_url(12345) is False + ok, err = chkMMD.check_url(12345) + assert ok is False + assert err == ["URL contains non-ASCII characters.", "URL cannot be parsed by urllib."] # END Test testMMDTools_CheckURLs -@pytest.mark.mmd_tools -def off_testMMDTools_CheckCF(): +@pytest.mark.tools +def testMMDTools_CheckCF(): """Test the check_cf function. """ - assert check_cf(["sea_surface_temperature"]) is True - assert check_cf(["sea_surace_temperature"]) is False + chkMMD = CheckMMD() + + ok, err, n = chkMMD.check_cf(etree.ElementTree(etree.XML( + "" + " " + " sea_surface_temperature" + " " + "" + ))) + assert ok is True + assert err == [] + assert n == 1 + + ok, err, n = chkMMD.check_cf(etree.ElementTree(etree.XML( + "" + " " + " sea_surface_temperature" + " sea_surface_temperature" + " " + "" + ))) + assert ok is False + assert err == ["Only one CF name should be provided, got 2."] + assert n == 1 + + ok, err, n = chkMMD.check_cf(etree.ElementTree(etree.XML( + "" + " " + " sea_surace_temperature" + " " + "" + ))) + assert ok is False + assert err == ["Keyword 'sea_surace_temperature' is not a CF standard name."] + assert n == 1 + + ok, err, n = chkMMD.check_cf(etree.ElementTree(etree.XML( + "" + " " + " sea_surface_temperature" + " " + " " + " sea_surface_temperature" + " " + "" + ))) + assert ok is False + assert err == ["More than one CF entry found. Only one is allowed."] + assert n == 2 # END Test testMMDTools_CheckCF -@pytest.mark.mmd_tools -def off_testMMDTools_CheckVocabulary(): +@pytest.mark.tools +def testMMDTools_CheckVocabulary(): """Test the check_vocabulary function. """ - assert check_vocabulary(etree.ElementTree(etree.XML( + chkMMD = CheckMMD() + ok, err = chkMMD.check_vocabulary(etree.ElementTree(etree.XML( "Operational" - ))) is True + ))) + assert ok is True + assert err == [] - assert check_vocabulary(etree.ElementTree(etree.XML( + ok, err = chkMMD.check_vocabulary(etree.ElementTree(etree.XML( "OOperational" - ))) is False + ))) + assert ok is False + assert err == ["Incorrect vocabulary 'OOperational' for element 'operational_status'."] # END Test testMMDTools_CheckVocabulary -@pytest.mark.mmd_tools -def testMMDTools_FullCheck(filesDir, caplog): +@pytest.mark.tools +def testMMDTools_FullCheck(filesDir): """Test the full_check function. """ - caplog.set_level(logging.DEBUG, logger="dmci") + chkMMD = CheckMMD() passFile = os.path.join(filesDir, "api", "passing.xml") passTree = etree.parse(passFile, parser=etree.XMLParser(remove_blank_text=True)) # Full check - caplog.clear() - assert full_check(passTree) is True - assert "OK: 9 URLs" in caplog.text - assert "OK: geographic_extent/rectangle" in caplog.text + assert chkMMD.full_check(passTree) is True + ok, passed, failed = chkMMD.status() + assert ok is True + assert failed == [] + assert "\n".join(passed) == ( + "Passed: URL Check on 'https://gcmdservices.gsfc.nasa.gov/static/kms/'\n" + "Passed: URL Check on 'http://inspire.ec.europa.eu/theme'\n" + "Passed: URL Check on 'https://register.geonorge.no/subregister/metadata-kodelister/kartve" + "rket/nasjonal-temainndeling'\n" + "Passed: URL Check on 'http://spdx.org/licenses/CC-BY-4.0'\n" + "Passed: URL Check on 'https://thredds.met.no/thredds/dodsC/remotesensingsatellite/polar-s" + "wath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc'\n" + "Passed: URL Check on 'https://thredds.met.no/thredds/wms/remotesensingsatellite/polar-swa" + "th/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc?service=WMS&version=1.3.0&r" + "equest=GetCapabilities'\n" + "Passed: URL Check on 'https://thredds.met.no/thredds/fileServer/remotesensingsatellite/po" + "lar-swath/2021/04/29/aqua-modis-1km-20210429002844-20210429003955.nc'\n" + "Passed: URL Check on 'https://www.wmo-sat.info/oscar/satellites/view/aqua'\n" + "Passed: URL Check on 'https://www.wmo-sat.info/oscar/instruments/view/modis'\n" + "Passed: Rectangle Check\n" + "Passed: Controlled Vocabularies Check\n" + ).rstrip() # Full check with no elements to check - caplog.clear() - assert full_check(etree.ElementTree(etree.XML(""))) is True - assert "Found no elements contained an URL" in caplog.text - assert "Found no geographic_extent/rectangle element" in caplog.text + assert chkMMD.full_check(etree.ElementTree(etree.XML(""))) is True + ok, passed, failed = chkMMD.status() + assert ok is True + assert passed == [] + assert failed == [] # Full check with invalid elements etreeUrlRectNok = etree.ElementTree(etree.XML( @@ -186,30 +380,20 @@ def testMMDTools_FullCheck(filesDir, caplog): " NotOpen" "" )) - caplog.clear() - assert full_check(etreeUrlRectNok) is False - assert "NOK: URLs" in caplog.text - assert "NOK: geographic_extent/rectangle" in caplog.text - - # Twice the element keywords for the same vocabulary - # root = etree.Element("toto") - # key1 = etree.SubElement(root, "keywords", vocabulary="Climate and Forecast Standard Names") - # etree.SubElement(key1, "keyword").text = "air_temperature" - # key2 = etree.SubElement(root, "keywords", vocabulary="Climate and Forecast Standard Names") - # etree.SubElement(key2, "keyword").text = "air_temperature" - # assert full_check(root) is False - - # Correct case - # root = etree.Element("toto") - # root1 = etree.SubElement(root, "keywords", vocabulary="Climate and Forecast Standard Names") - # etree.SubElement(root1, "keyword").text = "sea_surface_temperature" - # assert full_check(root) is True - - # Two standard names provided - # root = etree.Element("toto") - # root1 = etree.SubElement(root, "keywords", vocabulary="Climate and Forecast Standard Names") - # etree.SubElement(root1, "keyword").text = "air_temperature" - # etree.SubElement(root1, "keyword").text = "sea_surface_temperature" - # assert full_check(root) is False + assert chkMMD.full_check(etreeUrlRectNok) is False + ok, passed, failed = chkMMD.status() + assert ok is False + assert passed == [] + assert "\n".join(failed) == ( + "Failed: URL Check on 'https://www.mæt.no/'\n" + " - URL contains non-ASCII characters.\n" + "Failed: Rectangle Check\n" + " - Missing rectangle element 'east'.\n" + "Failed: Climate and Forecast Standard Names Check\n" + " - Only one CF name should be provided, got 2.\n" + " - Keyword 'air_surface_temperature' is not a CF standard name.\n" + "Failed: Controlled Vocabularies Check\n" + " - Incorrect vocabulary 'NotOpen' for element 'operational_status'.\n" + ).rstrip() # END Test testMMDTools_FullCheck