From b75ae2f3b3563878382e202b36c5ae6668434b99 Mon Sep 17 00:00:00 2001 From: Binita Date: Mon, 4 Nov 2024 15:32:27 -0600 Subject: [PATCH 1/3] added new check for opendap for umm-g and echo-g --- pyQuARC/code/custom_validator.py | 50 +++++++++++++++++++++++++++++ pyQuARC/schemas/check_messages.json | 8 +++++ pyQuARC/schemas/checks.json | 5 +++ pyQuARC/schemas/rule_mapping.json | 28 ++++++++++++++++ 4 files changed, 91 insertions(+) diff --git a/pyQuARC/code/custom_validator.py b/pyQuARC/code/custom_validator.py index bf3620d1..1973ca75 100644 --- a/pyQuARC/code/custom_validator.py +++ b/pyQuARC/code/custom_validator.py @@ -277,3 +277,53 @@ def count_check(count, values, key): items = [items] num_items = len(items) return {"valid": int(count) == num_items, "value": (count, num_items)} + + @staticmethod + def opendap_link_check(related_urls, key, extra=None): + """ + Checks if the related_urls contains an OPeNDAP link with the type "OPENDAP DATA" or URL containing "opendap". + + Args: + related_urls (list): The related_urls field of the object, expected to be a list of URL objects. + key (dict): A dictionary with "type" and "url_keyword" keys for the checks. + extra (optional): An additional argument to match the expected function call signature. This argument is ignored. + + Returns: + dict: A validation result indicating whether a valid OPeNDAP link is present and the link itself if found. + """ + + # If related_urls is None or not provided, initialize it as an empty list + if not related_urls: + related_urls = [] + + # If related_urls is not a list, assume it's a single URL string and wrap it in a list of one dictionary + elif isinstance(related_urls, str): + related_urls = [{"URL": related_urls, "Type": key.get("type", "OPENDAP DATA")}] + + # Default return object if no valid OPeNDAP link is found + return_obj = { + "valid": False, + "value": "None" + } + + # Extract type and keyword from key for clearer conditions + type_to_check = key.get("type", "OPENDAP DATA").upper() + url_keyword = key.get("url_keyword", "opendap").lower() + + # Process each URL object in the list + for url_obj in related_urls: + # Ensure that url_obj is a dictionary before accessing its fields + if not isinstance(url_obj, dict): + continue + + # Check for "opendap" in the URL + url_value = url_obj.get("URL", "").lower() + type_field = url_obj.get("Type", "").upper() + + # Check if the URL contains "opendap" or if the Type matches "OPENDAP DATA" + if url_keyword in url_value or type_field == type_to_check: + return_obj["valid"] = True + return_obj["value"] = url_obj.get("URL", "None") + break + + return return_obj diff --git a/pyQuARC/schemas/check_messages.json b/pyQuARC/schemas/check_messages.json index 0b8b38c8..18bde23f 100644 --- a/pyQuARC/schemas/check_messages.json +++ b/pyQuARC/schemas/check_messages.json @@ -1070,5 +1070,13 @@ "url": "https://wiki.earthdata.nasa.gov/display/CMR/Spatial+Extent" }, "remediation": "Recommend providing the horizontal pixel resolution, if applicable. If provided, this information will be indexed in the EDSC 'Horizontal Data Resolution' search facet which allows users to search by spatial resolution." + }, + "opendap_link_check": { + "failure": "No OPeNDAP URL is provided in the granule fields. An OPeNDAP link is recommended for data access.", + "help": { + "message": "OPeNDAP links allow for direct data access through the OPeNDAP protocol.", + "url": "https://wiki.earthdata.nasa.gov/display/CMR/Related+URLs" + }, + "remediation": "Recommend providing an OPeNDAP in the granule's Online Resources or Related URLs fields for enhanced data accessibility." } } \ No newline at end of file diff --git a/pyQuARC/schemas/checks.json b/pyQuARC/schemas/checks.json index 778f4da3..ef303aa6 100644 --- a/pyQuARC/schemas/checks.json +++ b/pyQuARC/schemas/checks.json @@ -298,5 +298,10 @@ "data_type": "custom", "check_function": "count_check", "available": true + }, + "opendap_link_check": { + "data_type": "custom", + "check_function": "opendap_link_check", + "available": true } } diff --git a/pyQuARC/schemas/rule_mapping.json b/pyQuARC/schemas/rule_mapping.json index 2e3acc41..b6aab68e 100644 --- a/pyQuARC/schemas/rule_mapping.json +++ b/pyQuARC/schemas/rule_mapping.json @@ -3745,6 +3745,34 @@ "severity": "error", "check_id": "string_compare" }, + "opendap_link_check": { + "rule_name": "OPeNDAP Link Presence Check", + "fields_to_apply": { + "echo-g": [ + { + "fields": [ + "Granule/OnlineResources/OnlineResource/URL" + ] + } + ], + "umm-g": [ + { + "fields": [ + "RelatedURLs/URL" + ] + } + ] + }, + "data": [ + { + "type": "OPENDAP DATA", + "url_keyword": "opendap" + } + ], + "relation": "contains", + "check_id": "opendap_link_check", + "severity": "warning" + }, "location_keyword_presence_check": { "rule_name": "Location Keyword Presence Check", "fields_to_apply": { From 50e8082ec5c3f692ebc14ccb3e6dcf4d1670972d Mon Sep 17 00:00:00 2001 From: Binita Date: Tue, 5 Nov 2024 15:15:53 -0600 Subject: [PATCH 2/3] changes on rule_mapping for opendap --- pyQuARC/code/custom_validator.py | 17 +++++++++-------- pyQuARC/schemas/rule_mapping.json | 4 ++-- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pyQuARC/code/custom_validator.py b/pyQuARC/code/custom_validator.py index 1973ca75..6becf37d 100644 --- a/pyQuARC/code/custom_validator.py +++ b/pyQuARC/code/custom_validator.py @@ -281,7 +281,7 @@ def count_check(count, values, key): @staticmethod def opendap_link_check(related_urls, key, extra=None): """ - Checks if the related_urls contains an OPeNDAP link with the type "OPENDAP DATA" or URL containing "opendap". + Checks if the related_urls contains an OPeNDAP link by looking for "opendap" in the URL or matching Type/Subtype fields. Args: related_urls (list): The related_urls field of the object, expected to be a list of URL objects. @@ -296,9 +296,9 @@ def opendap_link_check(related_urls, key, extra=None): if not related_urls: related_urls = [] - # If related_urls is not a list, assume it's a single URL string and wrap it in a list of one dictionary + # If related_urls is a string, wrap it in a list as a single URL dictionary without setting Type elif isinstance(related_urls, str): - related_urls = [{"URL": related_urls, "Type": key.get("type", "OPENDAP DATA")}] + related_urls = [{"URL": related_urls}] # Default return object if no valid OPeNDAP link is found return_obj = { @@ -306,9 +306,9 @@ def opendap_link_check(related_urls, key, extra=None): "value": "None" } - # Extract type and keyword from key for clearer conditions - type_to_check = key.get("type", "OPENDAP DATA").upper() + # Extract URL keyword and type to check from key url_keyword = key.get("url_keyword", "opendap").lower() + type_to_check = key.get("type", "OPENDAP DATA").upper() # Process each URL object in the list for url_obj in related_urls: @@ -316,12 +316,13 @@ def opendap_link_check(related_urls, key, extra=None): if not isinstance(url_obj, dict): continue - # Check for "opendap" in the URL + # Retrieve URL, Type, and Subtype fields from each URL object url_value = url_obj.get("URL", "").lower() type_field = url_obj.get("Type", "").upper() + subtype_field = url_obj.get("Subtype", "").upper() - # Check if the URL contains "opendap" or if the Type matches "OPENDAP DATA" - if url_keyword in url_value or type_field == type_to_check: + # Check if any of the conditions is met: URL contains "opendap", Type is "OPENDAP DATA", or Subtype contains "OPENDAP DATA" + if url_keyword in url_value or type_to_check == type_field or type_to_check in subtype_field: return_obj["valid"] = True return_obj["value"] = url_obj.get("URL", "None") break diff --git a/pyQuARC/schemas/rule_mapping.json b/pyQuARC/schemas/rule_mapping.json index b6aab68e..054df11e 100644 --- a/pyQuARC/schemas/rule_mapping.json +++ b/pyQuARC/schemas/rule_mapping.json @@ -3751,14 +3751,14 @@ "echo-g": [ { "fields": [ - "Granule/OnlineResources/OnlineResource/URL" + "Granule/OnlineResources/OnlineResource" ] } ], "umm-g": [ { "fields": [ - "RelatedURLs/URL" + "RelatedUrls" ] } ] From 693965c023c0739b53cd6ae630b3228bcae7cfb6 Mon Sep 17 00:00:00 2001 From: Binita Date: Wed, 6 Nov 2024 09:38:58 -0600 Subject: [PATCH 3/3] make sure it works on dict or orderdict --- pyQuARC/code/custom_validator.py | 40 ++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/pyQuARC/code/custom_validator.py b/pyQuARC/code/custom_validator.py index 6becf37d..ab789d3f 100644 --- a/pyQuARC/code/custom_validator.py +++ b/pyQuARC/code/custom_validator.py @@ -2,6 +2,7 @@ from .string_validator import StringValidator from .utils import cmr_request, if_arg, set_cmr_prms +from collections.abc import Mapping class CustomValidator(BaseValidator): @@ -281,10 +282,13 @@ def count_check(count, values, key): @staticmethod def opendap_link_check(related_urls, key, extra=None): """ - Checks if the related_urls contains an OPeNDAP link by looking for "opendap" in the URL or matching Type/Subtype fields. + Checks if the related_urls contains an OPeNDAP link by looking for "opendap" in the URL + or matching Type/Subtype fields. This function works with both OrderedDict and regular dict, + as well as a list of dictionaries. Args: - related_urls (list): The related_urls field of the object, expected to be a list of URL objects. + related_urls (list or Mapping): The related_urls field of the object, expected to be a list of URL objects + or a single OrderedDict. key (dict): A dictionary with "type" and "url_keyword" keys for the checks. extra (optional): An additional argument to match the expected function call signature. This argument is ignored. @@ -296,15 +300,12 @@ def opendap_link_check(related_urls, key, extra=None): if not related_urls: related_urls = [] - # If related_urls is a string, wrap it in a list as a single URL dictionary without setting Type - elif isinstance(related_urls, str): - related_urls = [{"URL": related_urls}] + # If related_urls is a single Mapping (like OrderedDict), wrap it in a list + elif isinstance(related_urls, Mapping): + related_urls = [related_urls] # Default return object if no valid OPeNDAP link is found - return_obj = { - "valid": False, - "value": "None" - } + return_obj = {"valid": False, "value": "None"} # Extract URL keyword and type to check from key url_keyword = key.get("url_keyword", "opendap").lower() @@ -312,19 +313,28 @@ def opendap_link_check(related_urls, key, extra=None): # Process each URL object in the list for url_obj in related_urls: - # Ensure that url_obj is a dictionary before accessing its fields - if not isinstance(url_obj, dict): + # Ensure that url_obj is a dictionary-like object before processing + if not isinstance(url_obj, Mapping): continue - # Retrieve URL, Type, and Subtype fields from each URL object + # Retrieve the URL field url_value = url_obj.get("URL", "").lower() + + # Check if the URL contains "opendap" + if "opendap" in url_value: + return_obj["valid"] = True + return_obj["value"] = url_value + break + + # Retrieve and normalize Type and Subtype fields type_field = url_obj.get("Type", "").upper() subtype_field = url_obj.get("Subtype", "").upper() - # Check if any of the conditions is met: URL contains "opendap", Type is "OPENDAP DATA", or Subtype contains "OPENDAP DATA" - if url_keyword in url_value or type_to_check == type_field or type_to_check in subtype_field: + # Check if the Type or Subtype contains "OPENDAP DATA" + if type_to_check in type_field or type_to_check in subtype_field: return_obj["valid"] = True - return_obj["value"] = url_obj.get("URL", "None") + return_obj["value"] = url_value if url_value else "None" break return return_obj +