Skip to content

Commit

Permalink
Merge pull request #144 from nansencenter/upgrade_earthdata_normalizer
Browse files Browse the repository at this point in the history
Make Earthdata CMR normalizer more generic
  • Loading branch information
aperrin66 authored Apr 17, 2024
2 parents aa68a1d + a469ab4 commit ad109d6
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 12 deletions.
25 changes: 17 additions & 8 deletions metanorm/normalizers/geospaas/earthdata_cmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,27 @@ def get_entry_title(self, raw_metadata):

@utils.raises((KeyError, IndexError))
def get_entry_id(self, raw_metadata):
return raw_metadata['umm']['DataGranule']['Identifiers'][0]['Identifier'].rstrip('.nc')
try:
return raw_metadata['umm']['DataGranule']['Identifiers'][0]['Identifier'].rstrip('.nc')
except KeyError:
return raw_metadata['umm']['GranuleUR']

@utils.raises((KeyError, IndexError))
def get_summary(self, raw_metadata):
summary_fields = {}
description = ''
umm = raw_metadata['umm']

for platform in umm['Platforms']:
description += (
f"Platform={platform['ShortName']}, " +
', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments']))
try:
for platform in umm['Platforms']:
description += (
f"Platform={platform['ShortName']}, " +
', '.join(f"Instrument={i['ShortName']}" for i in platform['Instruments']) +
', ')
except KeyError:
pass
description += f"Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}"

description += (
f", Start date={umm['TemporalExtent']['RangeDateTime']['BeginningDateTime']}")
summary_fields[utils.SUMMARY_FIELDS['description']] = description

processing_level_match = re.match(
Expand All @@ -66,7 +72,10 @@ def get_platform(self, raw_metadata):
"""Only get the first platform from the raw metadata, because
GeoSPaaS does not support more than one platform per dataset
"""
return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName'])
try:
return utils.get_gcmd_platform(raw_metadata['umm']['Platforms'][0]['ShortName'])
except KeyError:
return utils.get_gcmd_platform(utils.UNKNOWN)

@utils.raises((KeyError, IndexError))
def get_instrument(self, raw_metadata):
Expand Down
1 change: 1 addition & 0 deletions metanorm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def export_subclasses(package__all__, package_name, package_dir, base_class):
# providers
'ESA/EO': ('ESA',),
'OB.DAAC': ('OB_DAAC',),
'NASA/JPL/PODAAC': ('POCLOUD',),
'C-SAR': ('SAR-C', 'SAR-C SAR'),
'EUMETSAT/OSISAF': ('EUMETSAT OSI SAF',),
'NSIDC': ('NSIDC_ECS',),
Expand Down
40 changes: 36 additions & 4 deletions tests/normalizers/test_earthdata_cmr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Tests for the ACDD metadata normalizer"""
import unittest
import unittest.mock as mock
from collections import OrderedDict
from datetime import datetime

from dateutil.tz import tzutc
Expand Down Expand Up @@ -38,6 +39,11 @@ def test_entry_id(self):
}
self.assertEqual(self.normalizer.get_entry_id(attributes), 'V2020245000600.L2_SNPP_OC')

def test_entry_id_from_granuleUR(self):
"""Test getting the ID from the GranuleUR field"""
attributes = {'umm': {'GranuleUR': 'foo'}}
self.assertEqual(self.normalizer.get_entry_id(attributes), 'foo')

def test_entry_id_missing_attribute(self):
"""A MetadataNormalizationError must be raised if the raw
attribute is missing
Expand Down Expand Up @@ -109,6 +115,27 @@ def test_summary(self):
'Description: Platform=SUOMI-NPP, ' +
'Instrument=VIIRS, Start date=2020-09-01T00:06:00Z')

def test_summary_no_platform(self):
"""Test getting a summary when no platform info is available
"""
attributes = {
"umm": {
"TemporalExtent": {
"RangeDateTime": {
"BeginningDateTime": "2020-09-01T00:06:00Z",
"EndingDateTime": "2020-09-01T00:11:59Z"
}
},
"CollectionReference": {
"ShortName": "VIIRSN_L2_OC",
"Version": "2018"
}
}
}
self.assertEqual(
self.normalizer.get_summary(attributes),
'Description: Start date=2020-09-01T00:06:00Z;Processing level: 2')

def test_summary_missing_attribute(self):
"""A MetadataNormalizationError must be raised if the raw
attribute is missing
Expand Down Expand Up @@ -189,10 +216,15 @@ def test_platform_missing_attribute(self):
"""A MetadataNormalizationError must be raised if the raw
attribute is missing
"""
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_platform({})
with self.assertRaises(MetadataNormalizationError):
self.normalizer.get_platform({'umm': {'foo': 'bar'}})
unknown_platform = OrderedDict([
('Category', 'Unknown'),
('Series_Entity', 'Unknown'),
('Short_Name', 'Unknown'),
('Long_Name', 'Unknown')
])
self.assertDictEqual(self.normalizer.get_platform({}), unknown_platform)
self.assertDictEqual(self.normalizer.get_platform({'umm': {'foo': 'bar'}}),
unknown_platform)

def test_instrument(self):
"""Test getting the instrument"""
Expand Down

0 comments on commit ad109d6

Please sign in to comment.